xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e2ee2a47cae8248a2ea96b9d6f35ff3f37ecfc00)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for direct calls from Fortran (Used in PETSc-fun3d)
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590   stepval = (n-1)*4;
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     rp   = aj + ai[row];
594     ap   = aa + 16*ai[row];
595     nrow = ailen[row];
596     low  = 0;
597     high = nrow;
598     for (l=0; l<n; l++) { /* loop over added columns */
599       col = in[l];
600       if (col <= lastcol) low = 0; else high = nrow;
601       lastcol = col;
602       value = v + k*(stepval+4 + l)*4;
603       while (high-low > 7) {
604         t = (low+high)/2;
605         if (rp[t] > col) high = t;
606         else             low  = t;
607       }
608       for (i=low; i<high; i++) {
609         if (rp[i] > col) break;
610         if (rp[i] == col) {
611           bap  = ap +  16*i;
612           for (ii=0; ii<4; ii++,value+=stepval) {
613             for (jj=ii; jj<16; jj+=4) {
614               bap[jj] += *value++;
615             }
616           }
617           goto noinsert2;
618         }
619       }
620       N = nrow++ - 1;
621       high++; /* added new column index thus must search to one higher than before */
622       /* shift up all the later entries in this row */
623       for (ii=N; ii>=i; ii--) {
624         rp[ii+1] = rp[ii];
625         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626       }
627       if (N >= i) {
628         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629       }
630       rp[i] = col;
631       bap   = ap +  16*i;
632       for (ii=0; ii<4; ii++,value+=stepval) {
633         for (jj=ii; jj<16; jj+=4) {
634           bap[jj] = *value++;
635         }
636       }
637       noinsert2:;
638       low = i;
639     }
640     ailen[row] = nrow;
641   }
642   PetscFunctionReturnVoid();
643 }
644 EXTERN_C_END
645 
646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
647 #define matsetvalues4_ MATSETVALUES4
648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649 #define matsetvalues4_ matsetvalues4
650 #endif
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatSetValues4_"
655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656 {
657   Mat         A = *AA;
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660   PetscInt    *ai=a->i,*ailen=a->ilen;
661   PetscInt    *aj=a->j,brow,bcol;
662   PetscInt    ridx,cidx,lastcol = -1;
663   MatScalar   *ap,value,*aa=a->a,*bap;
664 
665   PetscFunctionBegin;
666   for (k=0; k<m; k++) { /* loop over added rows */
667     row  = im[k]; brow = row/4;
668     rp   = aj + ai[brow];
669     ap   = aa + 16*ai[brow];
670     nrow = ailen[brow];
671     low  = 0;
672     high = nrow;
673     for (l=0; l<n; l++) { /* loop over added columns */
674       col = in[l]; bcol = col/4;
675       ridx = row % 4; cidx = col % 4;
676       value = v[l + k*n];
677       if (col <= lastcol) low = 0; else high = nrow;
678       lastcol = col;
679       while (high-low > 7) {
680         t = (low+high)/2;
681         if (rp[t] > bcol) high = t;
682         else              low  = t;
683       }
684       for (i=low; i<high; i++) {
685         if (rp[i] > bcol) break;
686         if (rp[i] == bcol) {
687           bap  = ap +  16*i + 4*cidx + ridx;
688           *bap += value;
689           goto noinsert1;
690         }
691       }
692       N = nrow++ - 1;
693       high++; /* added new column thus must search to one higher than before */
694       /* shift up all the later entries in this row */
695       for (ii=N; ii>=i; ii--) {
696         rp[ii+1] = rp[ii];
697         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698       }
699       if (N>=i) {
700         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701       }
702       rp[i]                    = bcol;
703       ap[16*i + 4*cidx + ridx] = value;
704       noinsert1:;
705       low = i;
706     }
707     ailen[brow] = nrow;
708   }
709   PetscFunctionReturnVoid();
710 }
711 EXTERN_C_END
712 
713 /*  UGLY, ugly, ugly
714    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717    converts the entries into single precision and then calls ..._MatScalar() to put them
718    into the single precision data structures.
719 */
720 #if defined(PETSC_USE_MAT_SINGLE)
721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722 #else
723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724 #endif
725 
726 #define CHUNKSIZE  10
727 
728 /*
729      Checks for missing diagonals
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734 {
735   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
736   PetscErrorCode ierr;
737   PetscInt       *diag,*jj = a->j,i;
738 
739   PetscFunctionBegin;
740   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
741   diag = a->diag;
742   for (i=0; i<a->mbs; i++) {
743     if (jj[diag[i]] != i) {
744       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745     }
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753 {
754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
755   PetscErrorCode ierr;
756   PetscInt       i,j,*diag,m = a->mbs;
757 
758   PetscFunctionBegin;
759   if (a->diag) PetscFunctionReturn(0);
760 
761   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
762   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
763   for (i=0; i<m; i++) {
764     diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         diag[i] = j;
768         break;
769       }
770     }
771   }
772   a->diag = diag;
773   PetscFunctionReturn(0);
774 }
775 
776 
777 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
781 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
782 {
783   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
784   PetscErrorCode ierr;
785   PetscInt       n = a->mbs,i;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
792   } else if (oshift == 1) {
793     /* temporarily add 1 to i and j indices */
794     PetscInt nz = a->i[n];
795     for (i=0; i<nz; i++) a->j[i]++;
796     for (i=0; i<n+1; i++) a->i[i]++;
797     *ia = a->i; *ja = a->j;
798   } else {
799     *ia = a->i; *ja = a->j;
800   }
801 
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
807 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
808 {
809   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
810   PetscErrorCode ierr;
811   PetscInt       i,n = a->mbs;
812 
813   PetscFunctionBegin;
814   if (!ia) PetscFunctionReturn(0);
815   if (symmetric) {
816     ierr = PetscFree(*ia);CHKERRQ(ierr);
817     ierr = PetscFree(*ja);CHKERRQ(ierr);
818   } else if (oshift == 1) {
819     PetscInt nz = a->i[n]-1;
820     for (i=0; i<nz; i++) a->j[i]--;
821     for (i=0; i<n+1; i++) a->i[i]--;
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatDestroy_SeqBAIJ"
828 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
829 {
830   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
831   PetscErrorCode ierr;
832 
833   PetscFunctionBegin;
834 #if defined(PETSC_USE_LOG)
835   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
836 #endif
837   if (a->freedata){
838     ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
839   }
840   if (a->row) {
841     ierr = ISDestroy(a->row);CHKERRQ(ierr);
842   }
843   if (a->col) {
844     ierr = ISDestroy(a->col);CHKERRQ(ierr);
845   }
846   ierr = PetscFree(a->diag);CHKERRQ(ierr);
847   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
848   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
849   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
850   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
851   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
852   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
853 #if defined(PETSC_USE_MAT_SINGLE)
854   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
855 #endif
856   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
857   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
858 
859   ierr = PetscFree(a);CHKERRQ(ierr);
860 
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
866   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "MatSetOption_SeqBAIJ"
873 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
874 {
875   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879   switch (op) {
880   case MAT_ROW_ORIENTED:
881     a->roworiented    = PETSC_TRUE;
882     break;
883   case MAT_COLUMN_ORIENTED:
884     a->roworiented    = PETSC_FALSE;
885     break;
886   case MAT_COLUMNS_SORTED:
887     a->sorted         = PETSC_TRUE;
888     break;
889   case MAT_COLUMNS_UNSORTED:
890     a->sorted         = PETSC_FALSE;
891     break;
892   case MAT_KEEP_ZEROED_ROWS:
893     a->keepzeroedrows = PETSC_TRUE;
894     break;
895   case MAT_NO_NEW_NONZERO_LOCATIONS:
896     a->nonew          = 1;
897     break;
898   case MAT_NEW_NONZERO_LOCATION_ERR:
899     a->nonew          = -1;
900     break;
901   case MAT_NEW_NONZERO_ALLOCATION_ERR:
902     a->nonew          = -2;
903     break;
904   case MAT_YES_NEW_NONZERO_LOCATIONS:
905     a->nonew          = 0;
906     break;
907   case MAT_ROWS_SORTED:
908   case MAT_ROWS_UNSORTED:
909   case MAT_YES_NEW_DIAGONALS:
910   case MAT_IGNORE_OFF_PROC_ENTRIES:
911   case MAT_USE_HASH_TABLE:
912     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
913     break;
914   case MAT_NO_NEW_DIAGONALS:
915     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
916   case MAT_SYMMETRIC:
917   case MAT_STRUCTURALLY_SYMMETRIC:
918   case MAT_NOT_SYMMETRIC:
919   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
920   case MAT_HERMITIAN:
921   case MAT_NOT_HERMITIAN:
922   case MAT_SYMMETRY_ETERNAL:
923   case MAT_NOT_SYMMETRY_ETERNAL:
924     break;
925   default:
926     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
927   }
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatGetRow_SeqBAIJ"
933 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
934 {
935   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
938   MatScalar      *aa,*aa_i;
939   PetscScalar    *v_i;
940 
941   PetscFunctionBegin;
942   bs  = A->rmap.bs;
943   ai  = a->i;
944   aj  = a->j;
945   aa  = a->a;
946   bs2 = a->bs2;
947 
948   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
949 
950   bn  = row/bs;   /* Block number */
951   bp  = row % bs; /* Block Position */
952   M   = ai[bn+1] - ai[bn];
953   *nz = bs*M;
954 
955   if (v) {
956     *v = 0;
957     if (*nz) {
958       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
959       for (i=0; i<M; i++) { /* for each block in the block row */
960         v_i  = *v + i*bs;
961         aa_i = aa + bs2*(ai[bn] + i);
962         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
963       }
964     }
965   }
966 
967   if (idx) {
968     *idx = 0;
969     if (*nz) {
970       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
971       for (i=0; i<M; i++) { /* for each block in the block row */
972         idx_i = *idx + i*bs;
973         itmp  = bs*aj[ai[bn] + i];
974         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
975       }
976     }
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
983 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
984 {
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
989   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatTranspose_SeqBAIJ"
995 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
996 {
997   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
998   Mat            C;
999   PetscErrorCode ierr;
1000   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1001   PetscInt       *rows,*cols,bs2=a->bs2;
1002   PetscScalar    *array;
1003 
1004   PetscFunctionBegin;
1005   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1006   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1007   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1008 
1009 #if defined(PETSC_USE_MAT_SINGLE)
1010   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1011   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1012 #else
1013   array = a->a;
1014 #endif
1015 
1016   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1017   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1018   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1019   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1020   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1021   ierr = PetscFree(col);CHKERRQ(ierr);
1022   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1023   cols = rows + bs;
1024   for (i=0; i<mbs; i++) {
1025     cols[0] = i*bs;
1026     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1027     len = ai[i+1] - ai[i];
1028     for (j=0; j<len; j++) {
1029       rows[0] = (*aj++)*bs;
1030       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1031       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1032       array += bs2;
1033     }
1034   }
1035   ierr = PetscFree(rows);CHKERRQ(ierr);
1036 #if defined(PETSC_USE_MAT_SINGLE)
1037   ierr = PetscFree(array);
1038 #endif
1039 
1040   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042 
1043   if (B) {
1044     *B = C;
1045   } else {
1046     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1053 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1054 {
1055   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1056   PetscErrorCode ierr;
1057   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1058   int            fd;
1059   PetscScalar    *aa;
1060   FILE           *file;
1061 
1062   PetscFunctionBegin;
1063   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1064   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1065   col_lens[0] = MAT_FILE_COOKIE;
1066 
1067   col_lens[1] = A->rmap.N;
1068   col_lens[2] = A->cmap.n;
1069   col_lens[3] = a->nz*bs2;
1070 
1071   /* store lengths of each row and write (including header) to file */
1072   count = 0;
1073   for (i=0; i<a->mbs; i++) {
1074     for (j=0; j<bs; j++) {
1075       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1076     }
1077   }
1078   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1079   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1080 
1081   /* store column indices (zero start index) */
1082   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1083   count = 0;
1084   for (i=0; i<a->mbs; i++) {
1085     for (j=0; j<bs; j++) {
1086       for (k=a->i[i]; k<a->i[i+1]; k++) {
1087         for (l=0; l<bs; l++) {
1088           jj[count++] = bs*a->j[k] + l;
1089         }
1090       }
1091     }
1092   }
1093   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1094   ierr = PetscFree(jj);CHKERRQ(ierr);
1095 
1096   /* store nonzero values */
1097   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1098   count = 0;
1099   for (i=0; i<a->mbs; i++) {
1100     for (j=0; j<bs; j++) {
1101       for (k=a->i[i]; k<a->i[i+1]; k++) {
1102         for (l=0; l<bs; l++) {
1103           aa[count++] = a->a[bs2*k + l*bs + j];
1104         }
1105       }
1106     }
1107   }
1108   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1109   ierr = PetscFree(aa);CHKERRQ(ierr);
1110 
1111   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1112   if (file) {
1113     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1120 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1121 {
1122   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1123   PetscErrorCode    ierr;
1124   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1125   PetscViewerFormat format;
1126 
1127   PetscFunctionBegin;
1128   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1129   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1130     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1131   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1132     Mat aij;
1133     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1134     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1135     ierr = MatDestroy(aij);CHKERRQ(ierr);
1136   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1137      PetscFunctionReturn(0);
1138   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1139     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1140     for (i=0; i<a->mbs; i++) {
1141       for (j=0; j<bs; j++) {
1142         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1143         for (k=a->i[i]; k<a->i[i+1]; k++) {
1144           for (l=0; l<bs; l++) {
1145 #if defined(PETSC_USE_COMPLEX)
1146             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1148                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1151                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1152             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1153               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1154             }
1155 #else
1156             if (a->a[bs2*k + l*bs + j] != 0.0) {
1157               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1158             }
1159 #endif
1160           }
1161         }
1162         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1163       }
1164     }
1165     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1166   } else {
1167     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1168     for (i=0; i<a->mbs; i++) {
1169       for (j=0; j<bs; j++) {
1170         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1171         for (k=a->i[i]; k<a->i[i+1]; k++) {
1172           for (l=0; l<bs; l++) {
1173 #if defined(PETSC_USE_COMPLEX)
1174             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1175               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1176                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1179                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1180             } else {
1181               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1182             }
1183 #else
1184             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1185 #endif
1186           }
1187         }
1188         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1189       }
1190     }
1191     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1192   }
1193   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1199 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1200 {
1201   Mat            A = (Mat) Aa;
1202   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1205   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1206   MatScalar      *aa;
1207   PetscViewer    viewer;
1208 
1209   PetscFunctionBegin;
1210 
1211   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1212   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1213 
1214   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1215 
1216   /* loop over matrix elements drawing boxes */
1217   color = PETSC_DRAW_BLUE;
1218   for (i=0,row=0; i<mbs; i++,row+=bs) {
1219     for (j=a->i[i]; j<a->i[i+1]; j++) {
1220       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1221       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1222       aa = a->a + j*bs2;
1223       for (k=0; k<bs; k++) {
1224         for (l=0; l<bs; l++) {
1225           if (PetscRealPart(*aa++) >=  0.) continue;
1226           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1227         }
1228       }
1229     }
1230   }
1231   color = PETSC_DRAW_CYAN;
1232   for (i=0,row=0; i<mbs; i++,row+=bs) {
1233     for (j=a->i[i]; j<a->i[i+1]; j++) {
1234       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1235       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1236       aa = a->a + j*bs2;
1237       for (k=0; k<bs; k++) {
1238         for (l=0; l<bs; l++) {
1239           if (PetscRealPart(*aa++) != 0.) continue;
1240           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1241         }
1242       }
1243     }
1244   }
1245 
1246   color = PETSC_DRAW_RED;
1247   for (i=0,row=0; i<mbs; i++,row+=bs) {
1248     for (j=a->i[i]; j<a->i[i+1]; j++) {
1249       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1250       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1251       aa = a->a + j*bs2;
1252       for (k=0; k<bs; k++) {
1253         for (l=0; l<bs; l++) {
1254           if (PetscRealPart(*aa++) <= 0.) continue;
1255           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1256         }
1257       }
1258     }
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1265 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1266 {
1267   PetscErrorCode ierr;
1268   PetscReal      xl,yl,xr,yr,w,h;
1269   PetscDraw      draw;
1270   PetscTruth     isnull;
1271 
1272   PetscFunctionBegin;
1273 
1274   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1275   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1276 
1277   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1278   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1279   xr += w;    yr += h;  xl = -w;     yl = -h;
1280   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1281   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1282   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatView_SeqBAIJ"
1288 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1289 {
1290   PetscErrorCode ierr;
1291   PetscTruth     iascii,isbinary,isdraw;
1292 
1293   PetscFunctionBegin;
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1296   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1297   if (iascii){
1298     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1299   } else if (isbinary) {
1300     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1301   } else if (isdraw) {
1302     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1303   } else {
1304     Mat B;
1305     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1306     ierr = MatView(B,viewer);CHKERRQ(ierr);
1307     ierr = MatDestroy(B);CHKERRQ(ierr);
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1315 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1316 {
1317   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1318   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1319   PetscInt    *ai = a->i,*ailen = a->ilen;
1320   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1321   MatScalar   *ap,*aa = a->a,zero = 0.0;
1322 
1323   PetscFunctionBegin;
1324   for (k=0; k<m; k++) { /* loop over rows */
1325     row  = im[k]; brow = row/bs;
1326     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1327     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1328     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1329     nrow = ailen[brow];
1330     for (l=0; l<n; l++) { /* loop over columns */
1331       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1332       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1333       col  = in[l] ;
1334       bcol = col/bs;
1335       cidx = col%bs;
1336       ridx = row%bs;
1337       high = nrow;
1338       low  = 0; /* assume unsorted */
1339       while (high-low > 5) {
1340         t = (low+high)/2;
1341         if (rp[t] > bcol) high = t;
1342         else             low  = t;
1343       }
1344       for (i=low; i<high; i++) {
1345         if (rp[i] > bcol) break;
1346         if (rp[i] == bcol) {
1347           *v++ = ap[bs2*i+bs*cidx+ridx];
1348           goto finished;
1349         }
1350       }
1351       *v++ = zero;
1352       finished:;
1353     }
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #if defined(PETSC_USE_MAT_SINGLE)
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1361 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1362 {
1363   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1364   PetscErrorCode ierr;
1365   PetscInt       i,N = m*n*b->bs2;
1366   MatScalar      *vsingle;
1367 
1368   PetscFunctionBegin;
1369   if (N > b->setvalueslen) {
1370     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1371     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1372     b->setvalueslen  = N;
1373   }
1374   vsingle = b->setvaluescopy;
1375   for (i=0; i<N; i++) {
1376     vsingle[i] = v[i];
1377   }
1378   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 #endif
1382 
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1386 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1387 {
1388   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1389   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1390   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1391   PetscErrorCode  ierr;
1392   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1393   PetscTruth      roworiented=a->roworiented;
1394   const MatScalar *value = v;
1395   MatScalar       *ap,*aa = a->a,*bap;
1396 
1397   PetscFunctionBegin;
1398   if (roworiented) {
1399     stepval = (n-1)*bs;
1400   } else {
1401     stepval = (m-1)*bs;
1402   }
1403   for (k=0; k<m; k++) { /* loop over added rows */
1404     row  = im[k];
1405     if (row < 0) continue;
1406 #if defined(PETSC_USE_DEBUG)
1407     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1408 #endif
1409     rp   = aj + ai[row];
1410     ap   = aa + bs2*ai[row];
1411     rmax = imax[row];
1412     nrow = ailen[row];
1413     low  = 0;
1414     high = nrow;
1415     for (l=0; l<n; l++) { /* loop over added columns */
1416       if (in[l] < 0) continue;
1417 #if defined(PETSC_USE_DEBUG)
1418       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1419 #endif
1420       col = in[l];
1421       if (roworiented) {
1422         value = v + k*(stepval+bs)*bs + l*bs;
1423       } else {
1424         value = v + l*(stepval+bs)*bs + k*bs;
1425       }
1426       if (col <= lastcol) low = 0; else high = nrow;
1427       lastcol = col;
1428       while (high-low > 7) {
1429         t = (low+high)/2;
1430         if (rp[t] > col) high = t;
1431         else             low  = t;
1432       }
1433       for (i=low; i<high; i++) {
1434         if (rp[i] > col) break;
1435         if (rp[i] == col) {
1436           bap  = ap +  bs2*i;
1437           if (roworiented) {
1438             if (is == ADD_VALUES) {
1439               for (ii=0; ii<bs; ii++,value+=stepval) {
1440                 for (jj=ii; jj<bs2; jj+=bs) {
1441                   bap[jj] += *value++;
1442                 }
1443               }
1444             } else {
1445               for (ii=0; ii<bs; ii++,value+=stepval) {
1446                 for (jj=ii; jj<bs2; jj+=bs) {
1447                   bap[jj] = *value++;
1448                 }
1449               }
1450             }
1451           } else {
1452             if (is == ADD_VALUES) {
1453               for (ii=0; ii<bs; ii++,value+=stepval) {
1454                 for (jj=0; jj<bs; jj++) {
1455                   *bap++ += *value++;
1456                 }
1457               }
1458             } else {
1459               for (ii=0; ii<bs; ii++,value+=stepval) {
1460                 for (jj=0; jj<bs; jj++) {
1461                   *bap++  = *value++;
1462                 }
1463               }
1464             }
1465           }
1466           goto noinsert2;
1467         }
1468       }
1469       if (nonew == 1) goto noinsert2;
1470       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1471       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1472       N = nrow++ - 1; high++;
1473       /* shift up all the later entries in this row */
1474       for (ii=N; ii>=i; ii--) {
1475         rp[ii+1] = rp[ii];
1476         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1477       }
1478       if (N >= i) {
1479         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1480       }
1481       rp[i] = col;
1482       bap   = ap +  bs2*i;
1483       if (roworiented) {
1484         for (ii=0; ii<bs; ii++,value+=stepval) {
1485           for (jj=ii; jj<bs2; jj+=bs) {
1486             bap[jj] = *value++;
1487           }
1488         }
1489       } else {
1490         for (ii=0; ii<bs; ii++,value+=stepval) {
1491           for (jj=0; jj<bs; jj++) {
1492             *bap++  = *value++;
1493           }
1494         }
1495       }
1496       noinsert2:;
1497       low = i;
1498     }
1499     ailen[row] = nrow;
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1506 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1507 {
1508   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1509   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1510   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1511   PetscErrorCode ierr;
1512   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1513   MatScalar      *aa = a->a,*ap;
1514   PetscReal      ratio=0.6;
1515 
1516   PetscFunctionBegin;
1517   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1518 
1519   if (m) rmax = ailen[0];
1520   for (i=1; i<mbs; i++) {
1521     /* move each row back by the amount of empty slots (fshift) before it*/
1522     fshift += imax[i-1] - ailen[i-1];
1523     rmax   = PetscMax(rmax,ailen[i]);
1524     if (fshift) {
1525       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1526       N = ailen[i];
1527       for (j=0; j<N; j++) {
1528         ip[j-fshift] = ip[j];
1529         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1530       }
1531     }
1532     ai[i] = ai[i-1] + ailen[i-1];
1533   }
1534   if (mbs) {
1535     fshift += imax[mbs-1] - ailen[mbs-1];
1536     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1537   }
1538   /* reset ilen and imax for each row */
1539   for (i=0; i<mbs; i++) {
1540     ailen[i] = imax[i] = ai[i+1] - ai[i];
1541   }
1542   a->nz = ai[mbs];
1543 
1544   /* diagonals may have moved, so kill the diagonal pointers */
1545   a->idiagvalid = PETSC_FALSE;
1546   if (fshift && a->diag) {
1547     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1548     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1549     a->diag = 0;
1550   }
1551   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1552   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1553   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1554   a->reallocs          = 0;
1555   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1556 
1557   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1558   if (a->compressedrow.use){
1559     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1560   }
1561 
1562   A->same_nonzero = PETSC_TRUE;
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 /*
1567    This function returns an array of flags which indicate the locations of contiguous
1568    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1569    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1570    Assume: sizes should be long enough to hold all the values.
1571 */
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1574 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1575 {
1576   PetscInt   i,j,k,row;
1577   PetscTruth flg;
1578 
1579   PetscFunctionBegin;
1580   for (i=0,j=0; i<n; j++) {
1581     row = idx[i];
1582     if (row%bs!=0) { /* Not the begining of a block */
1583       sizes[j] = 1;
1584       i++;
1585     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1586       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1587       i++;
1588     } else { /* Begining of the block, so check if the complete block exists */
1589       flg = PETSC_TRUE;
1590       for (k=1; k<bs; k++) {
1591         if (row+k != idx[i+k]) { /* break in the block */
1592           flg = PETSC_FALSE;
1593           break;
1594         }
1595       }
1596       if (flg) { /* No break in the bs */
1597         sizes[j] = bs;
1598         i+= bs;
1599       } else {
1600         sizes[j] = 1;
1601         i++;
1602       }
1603     }
1604   }
1605   *bs_max = j;
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1611 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1612 {
1613   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1614   PetscErrorCode ierr;
1615   PetscInt       i,j,k,count,*rows;
1616   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1617   PetscScalar    zero = 0.0;
1618   MatScalar      *aa;
1619 
1620   PetscFunctionBegin;
1621   /* Make a copy of the IS and  sort it */
1622   /* allocate memory for rows,sizes */
1623   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1624   sizes = rows + is_n;
1625 
1626   /* copy IS values to rows, and sort them */
1627   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1628   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1629   if (baij->keepzeroedrows) {
1630     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1631     bs_max = is_n;
1632     A->same_nonzero = PETSC_TRUE;
1633   } else {
1634     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1635     A->same_nonzero = PETSC_FALSE;
1636   }
1637 
1638   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1639     row   = rows[j];
1640     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1641     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1642     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1643     if (sizes[i] == bs && !baij->keepzeroedrows) {
1644       if (diag != 0.0) {
1645         if (baij->ilen[row/bs] > 0) {
1646           baij->ilen[row/bs]       = 1;
1647           baij->j[baij->i[row/bs]] = row/bs;
1648           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1649         }
1650         /* Now insert all the diagonal values for this bs */
1651         for (k=0; k<bs; k++) {
1652           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1653         }
1654       } else { /* (diag == 0.0) */
1655         baij->ilen[row/bs] = 0;
1656       } /* end (diag == 0.0) */
1657     } else { /* (sizes[i] != bs) */
1658 #if defined (PETSC_USE_DEBUG)
1659       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1660 #endif
1661       for (k=0; k<count; k++) {
1662         aa[0] =  zero;
1663         aa    += bs;
1664       }
1665       if (diag != 0.0) {
1666         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1667       }
1668     }
1669   }
1670 
1671   ierr = PetscFree(rows);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1679 {
1680   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1681   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1682   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1683   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1684   PetscErrorCode ierr;
1685   PetscInt       ridx,cidx,bs2=a->bs2;
1686   PetscTruth     roworiented=a->roworiented;
1687   MatScalar      *ap,value,*aa=a->a,*bap;
1688 
1689   PetscFunctionBegin;
1690   for (k=0; k<m; k++) { /* loop over added rows */
1691     row  = im[k];
1692     brow = row/bs;
1693     if (row < 0) continue;
1694 #if defined(PETSC_USE_DEBUG)
1695     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1696 #endif
1697     rp   = aj + ai[brow];
1698     ap   = aa + bs2*ai[brow];
1699     rmax = imax[brow];
1700     nrow = ailen[brow];
1701     low  = 0;
1702     high = nrow;
1703     for (l=0; l<n; l++) { /* loop over added columns */
1704       if (in[l] < 0) continue;
1705 #if defined(PETSC_USE_DEBUG)
1706       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1707 #endif
1708       col = in[l]; bcol = col/bs;
1709       ridx = row % bs; cidx = col % bs;
1710       if (roworiented) {
1711         value = v[l + k*n];
1712       } else {
1713         value = v[k + l*m];
1714       }
1715       if (col <= lastcol) low = 0; else high = nrow;
1716       lastcol = col;
1717       while (high-low > 7) {
1718         t = (low+high)/2;
1719         if (rp[t] > bcol) high = t;
1720         else              low  = t;
1721       }
1722       for (i=low; i<high; i++) {
1723         if (rp[i] > bcol) break;
1724         if (rp[i] == bcol) {
1725           bap  = ap +  bs2*i + bs*cidx + ridx;
1726           if (is == ADD_VALUES) *bap += value;
1727           else                  *bap  = value;
1728           goto noinsert1;
1729         }
1730       }
1731       if (nonew == 1) goto noinsert1;
1732       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1733       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1734       N = nrow++ - 1; high++;
1735       /* shift up all the later entries in this row */
1736       for (ii=N; ii>=i; ii--) {
1737         rp[ii+1] = rp[ii];
1738         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1739       }
1740       if (N>=i) {
1741         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1742       }
1743       rp[i]                      = bcol;
1744       ap[bs2*i + bs*cidx + ridx] = value;
1745       a->nz++;
1746       noinsert1:;
1747       low = i;
1748     }
1749     ailen[brow] = nrow;
1750   }
1751   A->same_nonzero = PETSC_FALSE;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1758 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1759 {
1760   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1761   Mat            outA;
1762   PetscErrorCode ierr;
1763   PetscTruth     row_identity,col_identity;
1764 
1765   PetscFunctionBegin;
1766   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1767   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1768   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1769   if (!row_identity || !col_identity) {
1770     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1771   }
1772 
1773   outA          = inA;
1774   inA->factor   = FACTOR_LU;
1775 
1776   if (!a->diag) {
1777     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1778   }
1779 
1780   a->row        = row;
1781   a->col        = col;
1782   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1783   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1784 
1785   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1786   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1787   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1788 
1789   /*
1790       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1791       for ILU(0) factorization with natural ordering
1792   */
1793   if (inA->rmap.bs < 8) {
1794     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1795   } else {
1796     if (!a->solve_work) {
1797       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1798       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1799     }
1800   }
1801 
1802   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1803 
1804   PetscFunctionReturn(0);
1805 }
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1808 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1809 {
1810   static PetscTruth called = PETSC_FALSE;
1811   MPI_Comm          comm = A->comm;
1812   PetscErrorCode    ierr;
1813 
1814   PetscFunctionBegin;
1815   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1816   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1817   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 EXTERN_C_BEGIN
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1824 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1825 {
1826   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1827   PetscInt    i,nz,nbs;
1828 
1829   PetscFunctionBegin;
1830   nz  = baij->maxnz/baij->bs2;
1831   nbs = baij->nbs;
1832   for (i=0; i<nz; i++) {
1833     baij->j[i] = indices[i];
1834   }
1835   baij->nz = nz;
1836   for (i=0; i<nbs; i++) {
1837     baij->ilen[i] = baij->imax[i];
1838   }
1839 
1840   PetscFunctionReturn(0);
1841 }
1842 EXTERN_C_END
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1846 /*@
1847     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1848        in the matrix.
1849 
1850   Input Parameters:
1851 +  mat - the SeqBAIJ matrix
1852 -  indices - the column indices
1853 
1854   Level: advanced
1855 
1856   Notes:
1857     This can be called if you have precomputed the nonzero structure of the
1858   matrix and want to provide it to the matrix object to improve the performance
1859   of the MatSetValues() operation.
1860 
1861     You MUST have set the correct numbers of nonzeros per row in the call to
1862   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1863 
1864     MUST be called before any calls to MatSetValues();
1865 
1866 @*/
1867 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1868 {
1869   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1870 
1871   PetscFunctionBegin;
1872   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1873   PetscValidPointer(indices,2);
1874   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1875   if (f) {
1876     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1877   } else {
1878     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 #undef __FUNCT__
1884 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1885 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1886 {
1887   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1888   PetscErrorCode ierr;
1889   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1890   PetscReal      atmp;
1891   PetscScalar    *x,zero = 0.0;
1892   MatScalar      *aa;
1893   PetscInt       ncols,brow,krow,kcol;
1894 
1895   PetscFunctionBegin;
1896   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1897   bs   = A->rmap.bs;
1898   aa   = a->a;
1899   ai   = a->i;
1900   aj   = a->j;
1901   mbs = a->mbs;
1902 
1903   ierr = VecSet(v,zero);CHKERRQ(ierr);
1904   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1905   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1906   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1907   for (i=0; i<mbs; i++) {
1908     ncols = ai[1] - ai[0]; ai++;
1909     brow  = bs*i;
1910     for (j=0; j<ncols; j++){
1911       /* bcol = bs*(*aj); */
1912       for (kcol=0; kcol<bs; kcol++){
1913         for (krow=0; krow<bs; krow++){
1914           atmp = PetscAbsScalar(*aa); aa++;
1915           row = brow + krow;    /* row index */
1916           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1917           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1918         }
1919       }
1920       aj++;
1921     }
1922   }
1923   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatCopy_SeqBAIJ"
1929 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1930 {
1931   PetscErrorCode ierr;
1932 
1933   PetscFunctionBegin;
1934   /* If the two matrices have the same copy implementation, use fast copy. */
1935   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1936     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1937     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1938 
1939     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1940       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1941     }
1942     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1943   } else {
1944     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1951 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1952 {
1953   PetscErrorCode ierr;
1954 
1955   PetscFunctionBegin;
1956   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1962 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1963 {
1964   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1965   PetscFunctionBegin;
1966   *array = a->a;
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1972 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1973 {
1974   PetscFunctionBegin;
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 #include "petscblaslapack.h"
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1981 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1982 {
1983   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1984   PetscErrorCode ierr;
1985   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1986   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1987 
1988   PetscFunctionBegin;
1989   if (str == SAME_NONZERO_PATTERN) {
1990     PetscScalar alpha = a;
1991     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1992   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1993     if (y->xtoy && y->XtoY != X) {
1994       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1995       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1996     }
1997     if (!y->xtoy) { /* get xtoy */
1998       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1999       y->XtoY = X;
2000     }
2001     bs2 = bs*bs;
2002     for (i=0; i<x->nz; i++) {
2003       j = 0;
2004       while (j < bs2){
2005         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2006         j++;
2007       }
2008     }
2009     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2010   } else {
2011     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2018 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2019 {
2020   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2021   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2022   PetscScalar    *aa = a->a;
2023 
2024   PetscFunctionBegin;
2025   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 #undef __FUNCT__
2030 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2031 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2032 {
2033   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2034   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2035   PetscScalar    *aa = a->a;
2036 
2037   PetscFunctionBegin;
2038   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2039   PetscFunctionReturn(0);
2040 }
2041 
2042 
2043 /* -------------------------------------------------------------------*/
2044 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2045        MatGetRow_SeqBAIJ,
2046        MatRestoreRow_SeqBAIJ,
2047        MatMult_SeqBAIJ_N,
2048 /* 4*/ MatMultAdd_SeqBAIJ_N,
2049        MatMultTranspose_SeqBAIJ,
2050        MatMultTransposeAdd_SeqBAIJ,
2051        MatSolve_SeqBAIJ_N,
2052        0,
2053        0,
2054 /*10*/ 0,
2055        MatLUFactor_SeqBAIJ,
2056        0,
2057        0,
2058        MatTranspose_SeqBAIJ,
2059 /*15*/ MatGetInfo_SeqBAIJ,
2060        MatEqual_SeqBAIJ,
2061        MatGetDiagonal_SeqBAIJ,
2062        MatDiagonalScale_SeqBAIJ,
2063        MatNorm_SeqBAIJ,
2064 /*20*/ 0,
2065        MatAssemblyEnd_SeqBAIJ,
2066        0,
2067        MatSetOption_SeqBAIJ,
2068        MatZeroEntries_SeqBAIJ,
2069 /*25*/ MatZeroRows_SeqBAIJ,
2070        MatLUFactorSymbolic_SeqBAIJ,
2071        MatLUFactorNumeric_SeqBAIJ_N,
2072        MatCholeskyFactorSymbolic_SeqBAIJ,
2073        MatCholeskyFactorNumeric_SeqBAIJ_N,
2074 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2075        MatILUFactorSymbolic_SeqBAIJ,
2076        MatICCFactorSymbolic_SeqBAIJ,
2077        MatGetArray_SeqBAIJ,
2078        MatRestoreArray_SeqBAIJ,
2079 /*35*/ MatDuplicate_SeqBAIJ,
2080        0,
2081        0,
2082        MatILUFactor_SeqBAIJ,
2083        0,
2084 /*40*/ MatAXPY_SeqBAIJ,
2085        MatGetSubMatrices_SeqBAIJ,
2086        MatIncreaseOverlap_SeqBAIJ,
2087        MatGetValues_SeqBAIJ,
2088        MatCopy_SeqBAIJ,
2089 /*45*/ MatPrintHelp_SeqBAIJ,
2090        MatScale_SeqBAIJ,
2091        0,
2092        0,
2093        0,
2094 /*50*/ 0,
2095        MatGetRowIJ_SeqBAIJ,
2096        MatRestoreRowIJ_SeqBAIJ,
2097        0,
2098        0,
2099 /*55*/ 0,
2100        0,
2101        0,
2102        0,
2103        MatSetValuesBlocked_SeqBAIJ,
2104 /*60*/ MatGetSubMatrix_SeqBAIJ,
2105        MatDestroy_SeqBAIJ,
2106        MatView_SeqBAIJ,
2107        0,
2108        0,
2109 /*65*/ 0,
2110        0,
2111        0,
2112        0,
2113        0,
2114 /*70*/ MatGetRowMax_SeqBAIJ,
2115        MatConvert_Basic,
2116        0,
2117        0,
2118        0,
2119 /*75*/ 0,
2120        0,
2121        0,
2122        0,
2123        0,
2124 /*80*/ 0,
2125        0,
2126        0,
2127        0,
2128        MatLoad_SeqBAIJ,
2129 /*85*/ 0,
2130        0,
2131        0,
2132        0,
2133        0,
2134 /*90*/ 0,
2135        0,
2136        0,
2137        0,
2138        0,
2139 /*95*/ 0,
2140        0,
2141        0,
2142        0,
2143        0,
2144 /*100*/0,
2145        0,
2146        0,
2147        0,
2148        0,
2149 /*105*/0,
2150        MatRealPart_SeqBAIJ,
2151        MatImaginaryPart_SeqBAIJ
2152 };
2153 
2154 EXTERN_C_BEGIN
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2157 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2158 {
2159   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2160   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2161   PetscErrorCode ierr;
2162 
2163   PetscFunctionBegin;
2164   if (aij->nonew != 1) {
2165     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2166   }
2167 
2168   /* allocate space for values if not already there */
2169   if (!aij->saved_values) {
2170     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2171   }
2172 
2173   /* copy values over */
2174   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2175   PetscFunctionReturn(0);
2176 }
2177 EXTERN_C_END
2178 
2179 EXTERN_C_BEGIN
2180 #undef __FUNCT__
2181 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2182 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2183 {
2184   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2185   PetscErrorCode ierr;
2186   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2187 
2188   PetscFunctionBegin;
2189   if (aij->nonew != 1) {
2190     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2191   }
2192   if (!aij->saved_values) {
2193     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2194   }
2195 
2196   /* copy values over */
2197   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2198   PetscFunctionReturn(0);
2199 }
2200 EXTERN_C_END
2201 
2202 EXTERN_C_BEGIN
2203 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2204 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2205 EXTERN_C_END
2206 
2207 EXTERN_C_BEGIN
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2210 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2211 {
2212   Mat_SeqBAIJ    *b;
2213   PetscErrorCode ierr;
2214   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2215   PetscTruth     flg,skipallocation = PETSC_FALSE;
2216 
2217   PetscFunctionBegin;
2218 
2219   if (nz == MAT_SKIP_ALLOCATION) {
2220     skipallocation = PETSC_TRUE;
2221     nz             = 0;
2222   }
2223   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2224   if (nnz && newbs != bs) {
2225     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2226   }
2227   bs   = newbs;
2228 
2229   B->rmap.bs = B->cmap.bs = bs;
2230   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2231   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2232 
2233   B->preallocated = PETSC_TRUE;
2234 
2235   mbs  = B->rmap.n/bs;
2236   nbs  = B->cmap.n/bs;
2237   bs2  = bs*bs;
2238 
2239   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2240     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2241   }
2242 
2243   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2244   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2245   if (nnz) {
2246     for (i=0; i<mbs; i++) {
2247       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2248       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2249     }
2250   }
2251 
2252   b       = (Mat_SeqBAIJ*)B->data;
2253   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2254   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2255   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2256   if (!flg) {
2257     switch (bs) {
2258     case 1:
2259       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2260       B->ops->mult            = MatMult_SeqBAIJ_1;
2261       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2262       break;
2263     case 2:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2265       B->ops->mult            = MatMult_SeqBAIJ_2;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2268       break;
2269     case 3:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2271       B->ops->mult            = MatMult_SeqBAIJ_3;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2274       break;
2275     case 4:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2277       B->ops->mult            = MatMult_SeqBAIJ_4;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2279       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2280       break;
2281     case 5:
2282       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2283       B->ops->mult            = MatMult_SeqBAIJ_5;
2284       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2285       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2286       break;
2287     case 6:
2288       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2289       B->ops->mult            = MatMult_SeqBAIJ_6;
2290       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2291       break;
2292     case 7:
2293       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2294       B->ops->mult            = MatMult_SeqBAIJ_7;
2295       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2296       break;
2297     default:
2298       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2299       B->ops->mult            = MatMult_SeqBAIJ_N;
2300       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2301       break;
2302     }
2303   }
2304   B->rmap.bs      = bs;
2305   b->mbs     = mbs;
2306   b->nbs     = nbs;
2307   if (!skipallocation) {
2308     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2309     /* b->ilen will count nonzeros in each block row so far. */
2310     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2311     if (!nnz) {
2312       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2313       else if (nz <= 0)        nz = 1;
2314       for (i=0; i<mbs; i++) b->imax[i] = nz;
2315       nz = nz*mbs;
2316     } else {
2317       nz = 0;
2318       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2319     }
2320 
2321     /* allocate the matrix space */
2322     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2323     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2324     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2325     b->singlemalloc = PETSC_TRUE;
2326 
2327     b->i[0] = 0;
2328     for (i=1; i<mbs+1; i++) {
2329       b->i[i] = b->i[i-1] + b->imax[i-1];
2330     }
2331     b->freedata     = PETSC_TRUE;
2332   } else {
2333     b->freedata     = PETSC_FALSE;
2334   }
2335 
2336   B->rmap.bs          = bs;
2337   b->bs2              = bs2;
2338   b->mbs              = mbs;
2339   b->nz               = 0;
2340   b->maxnz            = nz*bs2;
2341   B->info.nz_unneeded = (PetscReal)b->maxnz;
2342   PetscFunctionReturn(0);
2343 }
2344 EXTERN_C_END
2345 
2346 /*MC
2347    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2348    block sparse compressed row format.
2349 
2350    Options Database Keys:
2351 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2352 
2353   Level: beginner
2354 
2355 .seealso: MatCreateSeqBAIJ()
2356 M*/
2357 
2358 EXTERN_C_BEGIN
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatCreate_SeqBAIJ"
2361 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2362 {
2363   PetscErrorCode ierr;
2364   PetscMPIInt    size;
2365   Mat_SeqBAIJ    *b;
2366 
2367   PetscFunctionBegin;
2368   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2369   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2370 
2371   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2372   B->data = (void*)b;
2373   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2374   B->factor           = 0;
2375   B->mapping          = 0;
2376   b->row              = 0;
2377   b->col              = 0;
2378   b->icol             = 0;
2379   b->reallocs         = 0;
2380   b->saved_values     = 0;
2381 #if defined(PETSC_USE_MAT_SINGLE)
2382   b->setvalueslen     = 0;
2383   b->setvaluescopy    = PETSC_NULL;
2384 #endif
2385 
2386   b->sorted           = PETSC_FALSE;
2387   b->roworiented      = PETSC_TRUE;
2388   b->nonew            = 0;
2389   b->diag             = 0;
2390   b->solve_work       = 0;
2391   b->mult_work        = 0;
2392   B->spptr            = 0;
2393   B->info.nz_unneeded = (PetscReal)b->maxnz;
2394   b->keepzeroedrows   = PETSC_FALSE;
2395   b->xtoy              = 0;
2396   b->XtoY              = 0;
2397   b->compressedrow.use     = PETSC_FALSE;
2398   b->compressedrow.nrows   = 0;
2399   b->compressedrow.i       = PETSC_NULL;
2400   b->compressedrow.rindex  = PETSC_NULL;
2401   b->compressedrow.checked = PETSC_FALSE;
2402   B->same_nonzero          = PETSC_FALSE;
2403 
2404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2405                                      "MatInvertBlockDiagonal_SeqBAIJ",
2406                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2407   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2408                                      "MatStoreValues_SeqBAIJ",
2409                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2411                                      "MatRetrieveValues_SeqBAIJ",
2412                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2414                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2415                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2417                                      "MatConvert_SeqBAIJ_SeqAIJ",
2418                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2420                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2421                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2423                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2424                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 EXTERN_C_END
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2431 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2432 {
2433   Mat            C;
2434   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2435   PetscErrorCode ierr;
2436   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2437 
2438   PetscFunctionBegin;
2439   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2440 
2441   *B = 0;
2442   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2443   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2444   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2445   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2446   c    = (Mat_SeqBAIJ*)C->data;
2447 
2448   C->rmap.N   = A->rmap.N;
2449   C->cmap.N   = A->cmap.N;
2450   C->rmap.bs  = A->rmap.bs;
2451   c->bs2 = a->bs2;
2452   c->mbs = a->mbs;
2453   c->nbs = a->nbs;
2454 
2455   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2456   for (i=0; i<mbs; i++) {
2457     c->imax[i] = a->imax[i];
2458     c->ilen[i] = a->ilen[i];
2459   }
2460 
2461   /* allocate the matrix space */
2462   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2463   c->singlemalloc = PETSC_TRUE;
2464   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2465   if (mbs > 0) {
2466     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2467     if (cpvalues == MAT_COPY_VALUES) {
2468       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2469     } else {
2470       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2471     }
2472   }
2473   c->sorted      = a->sorted;
2474   c->roworiented = a->roworiented;
2475   c->nonew       = a->nonew;
2476 
2477   if (a->diag) {
2478     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2479     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2480     for (i=0; i<mbs; i++) {
2481       c->diag[i] = a->diag[i];
2482     }
2483   } else c->diag        = 0;
2484   c->nz                 = a->nz;
2485   c->maxnz              = a->maxnz;
2486   c->solve_work         = 0;
2487   c->mult_work          = 0;
2488   c->freedata           = PETSC_TRUE;
2489   C->preallocated       = PETSC_TRUE;
2490   C->assembled          = PETSC_TRUE;
2491 
2492   c->compressedrow.use     = a->compressedrow.use;
2493   c->compressedrow.nrows   = a->compressedrow.nrows;
2494   c->compressedrow.checked = a->compressedrow.checked;
2495   if ( a->compressedrow.checked && a->compressedrow.use){
2496     i = a->compressedrow.nrows;
2497     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2498     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2499     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2500     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2501   } else {
2502     c->compressedrow.use    = PETSC_FALSE;
2503     c->compressedrow.i      = PETSC_NULL;
2504     c->compressedrow.rindex = PETSC_NULL;
2505   }
2506   C->same_nonzero = A->same_nonzero;
2507   *B = C;
2508   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatLoad_SeqBAIJ"
2514 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2515 {
2516   Mat_SeqBAIJ    *a;
2517   Mat            B;
2518   PetscErrorCode ierr;
2519   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2520   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2521   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2522   PetscInt       *masked,nmask,tmp,bs2,ishift;
2523   PetscMPIInt    size;
2524   int            fd;
2525   PetscScalar    *aa;
2526   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2527 
2528   PetscFunctionBegin;
2529   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2530   bs2  = bs*bs;
2531 
2532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2533   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2534   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2535   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2536   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2537   M = header[1]; N = header[2]; nz = header[3];
2538 
2539   if (header[3] < 0) {
2540     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2541   }
2542 
2543   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2544 
2545   /*
2546      This code adds extra rows to make sure the number of rows is
2547     divisible by the blocksize
2548   */
2549   mbs        = M/bs;
2550   extra_rows = bs - M + bs*(mbs);
2551   if (extra_rows == bs) extra_rows = 0;
2552   else                  mbs++;
2553   if (extra_rows) {
2554     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2555   }
2556 
2557   /* read in row lengths */
2558   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2559   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2560   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2561 
2562   /* read in column indices */
2563   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2564   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2565   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2566 
2567   /* loop over row lengths determining block row lengths */
2568   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2569   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2570   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2571   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2572   masked   = mask + mbs;
2573   rowcount = 0; nzcount = 0;
2574   for (i=0; i<mbs; i++) {
2575     nmask = 0;
2576     for (j=0; j<bs; j++) {
2577       kmax = rowlengths[rowcount];
2578       for (k=0; k<kmax; k++) {
2579         tmp = jj[nzcount++]/bs;
2580         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2581       }
2582       rowcount++;
2583     }
2584     browlengths[i] += nmask;
2585     /* zero out the mask elements we set */
2586     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2587   }
2588 
2589   /* create our matrix */
2590   ierr = MatCreate(comm,&B);
2591   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2592   ierr = MatSetType(B,type);CHKERRQ(ierr);
2593   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2594   a = (Mat_SeqBAIJ*)B->data;
2595 
2596   /* set matrix "i" values */
2597   a->i[0] = 0;
2598   for (i=1; i<= mbs; i++) {
2599     a->i[i]      = a->i[i-1] + browlengths[i-1];
2600     a->ilen[i-1] = browlengths[i-1];
2601   }
2602   a->nz         = 0;
2603   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2604 
2605   /* read in nonzero values */
2606   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2607   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2608   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2609 
2610   /* set "a" and "j" values into matrix */
2611   nzcount = 0; jcount = 0;
2612   for (i=0; i<mbs; i++) {
2613     nzcountb = nzcount;
2614     nmask    = 0;
2615     for (j=0; j<bs; j++) {
2616       kmax = rowlengths[i*bs+j];
2617       for (k=0; k<kmax; k++) {
2618         tmp = jj[nzcount++]/bs;
2619 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2620       }
2621     }
2622     /* sort the masked values */
2623     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2624 
2625     /* set "j" values into matrix */
2626     maskcount = 1;
2627     for (j=0; j<nmask; j++) {
2628       a->j[jcount++]  = masked[j];
2629       mask[masked[j]] = maskcount++;
2630     }
2631     /* set "a" values into matrix */
2632     ishift = bs2*a->i[i];
2633     for (j=0; j<bs; j++) {
2634       kmax = rowlengths[i*bs+j];
2635       for (k=0; k<kmax; k++) {
2636         tmp       = jj[nzcountb]/bs ;
2637         block     = mask[tmp] - 1;
2638         point     = jj[nzcountb] - bs*tmp;
2639         idx       = ishift + bs2*block + j + bs*point;
2640         a->a[idx] = (MatScalar)aa[nzcountb++];
2641       }
2642     }
2643     /* zero out the mask elements we set */
2644     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2645   }
2646   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2647 
2648   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2649   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2650   ierr = PetscFree(aa);CHKERRQ(ierr);
2651   ierr = PetscFree(jj);CHKERRQ(ierr);
2652   ierr = PetscFree(mask);CHKERRQ(ierr);
2653 
2654   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656   ierr = MatView_Private(B);CHKERRQ(ierr);
2657 
2658   *A = B;
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatCreateSeqBAIJ"
2664 /*@C
2665    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2666    compressed row) format.  For good matrix assembly performance the
2667    user should preallocate the matrix storage by setting the parameter nz
2668    (or the array nnz).  By setting these parameters accurately, performance
2669    during matrix assembly can be increased by more than a factor of 50.
2670 
2671    Collective on MPI_Comm
2672 
2673    Input Parameters:
2674 +  comm - MPI communicator, set to PETSC_COMM_SELF
2675 .  bs - size of block
2676 .  m - number of rows
2677 .  n - number of columns
2678 .  nz - number of nonzero blocks  per block row (same for all rows)
2679 -  nnz - array containing the number of nonzero blocks in the various block rows
2680          (possibly different for each block row) or PETSC_NULL
2681 
2682    Output Parameter:
2683 .  A - the matrix
2684 
2685    Options Database Keys:
2686 .   -mat_no_unroll - uses code that does not unroll the loops in the
2687                      block calculations (much slower)
2688 .    -mat_block_size - size of the blocks to use
2689 
2690    Level: intermediate
2691 
2692    Notes:
2693    The number of rows and columns must be divisible by blocksize.
2694 
2695    If the nnz parameter is given then the nz parameter is ignored
2696 
2697    A nonzero block is any block that as 1 or more nonzeros in it
2698 
2699    The block AIJ format is fully compatible with standard Fortran 77
2700    storage.  That is, the stored row and column indices can begin at
2701    either one (as in Fortran) or zero.  See the users' manual for details.
2702 
2703    Specify the preallocated storage with either nz or nnz (not both).
2704    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2705    allocation.  For additional details, see the users manual chapter on
2706    matrices.
2707 
2708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2709 @*/
2710 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2711 {
2712   PetscErrorCode ierr;
2713 
2714   PetscFunctionBegin;
2715   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2716   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2717   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2718   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2724 /*@C
2725    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2726    per row in the matrix. For good matrix assembly performance the
2727    user should preallocate the matrix storage by setting the parameter nz
2728    (or the array nnz).  By setting these parameters accurately, performance
2729    during matrix assembly can be increased by more than a factor of 50.
2730 
2731    Collective on MPI_Comm
2732 
2733    Input Parameters:
2734 +  A - the matrix
2735 .  bs - size of block
2736 .  nz - number of block nonzeros per block row (same for all rows)
2737 -  nnz - array containing the number of block nonzeros in the various block rows
2738          (possibly different for each block row) or PETSC_NULL
2739 
2740    Options Database Keys:
2741 .   -mat_no_unroll - uses code that does not unroll the loops in the
2742                      block calculations (much slower)
2743 .    -mat_block_size - size of the blocks to use
2744 
2745    Level: intermediate
2746 
2747    Notes:
2748    If the nnz parameter is given then the nz parameter is ignored
2749 
2750    The block AIJ format is fully compatible with standard Fortran 77
2751    storage.  That is, the stored row and column indices can begin at
2752    either one (as in Fortran) or zero.  See the users' manual for details.
2753 
2754    Specify the preallocated storage with either nz or nnz (not both).
2755    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2756    allocation.  For additional details, see the users manual chapter on
2757    matrices.
2758 
2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2760 @*/
2761 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2762 {
2763   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2767   if (f) {
2768     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2769   }
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2775 /*@
2776      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2777               (upper triangular entries in CSR format) provided by the user.
2778 
2779      Collective on MPI_Comm
2780 
2781    Input Parameters:
2782 +  comm - must be an MPI communicator of size 1
2783 .  bs - size of block
2784 .  m - number of rows
2785 .  n - number of columns
2786 .  i - row indices
2787 .  j - column indices
2788 -  a - matrix values
2789 
2790    Output Parameter:
2791 .  mat - the matrix
2792 
2793    Level: intermediate
2794 
2795    Notes:
2796        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2797     once the matrix is destroyed
2798 
2799        You cannot set new nonzero locations into this matrix, that will generate an error.
2800 
2801        The i and j indices are 0 based
2802 
2803 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2804 
2805 @*/
2806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2807 {
2808   PetscErrorCode ierr;
2809   PetscInt       ii;
2810   Mat_SeqBAIJ    *baij;
2811 
2812   PetscFunctionBegin;
2813   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2814   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2815 
2816   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2817   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2818   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2819   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2820   baij = (Mat_SeqBAIJ*)(*mat)->data;
2821   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2822 
2823   baij->i = i;
2824   baij->j = j;
2825   baij->a = a;
2826   baij->singlemalloc = PETSC_FALSE;
2827   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2828   baij->freedata     = PETSC_FALSE;
2829 
2830   for (ii=0; ii<m; ii++) {
2831     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2832 #if defined(PETSC_USE_DEBUG)
2833     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2834 #endif
2835   }
2836 #if defined(PETSC_USE_DEBUG)
2837   for (ii=0; ii<baij->i[m]; ii++) {
2838     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2839     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2840   }
2841 #endif
2842 
2843   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847