xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2a6744eb01855f5aa328eb8fdf4b0d01e72ad151)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for direct calls from Fortran (Used in PETSc-fun3d)
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590   stepval = (n-1)*4;
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     rp   = aj + ai[row];
594     ap   = aa + 16*ai[row];
595     nrow = ailen[row];
596     low  = 0;
597     high = nrow;
598     for (l=0; l<n; l++) { /* loop over added columns */
599       col = in[l];
600       if (col <= lastcol) low = 0; else high = nrow;
601       lastcol = col;
602       value = v + k*(stepval+4 + l)*4;
603       while (high-low > 7) {
604         t = (low+high)/2;
605         if (rp[t] > col) high = t;
606         else             low  = t;
607       }
608       for (i=low; i<high; i++) {
609         if (rp[i] > col) break;
610         if (rp[i] == col) {
611           bap  = ap +  16*i;
612           for (ii=0; ii<4; ii++,value+=stepval) {
613             for (jj=ii; jj<16; jj+=4) {
614               bap[jj] += *value++;
615             }
616           }
617           goto noinsert2;
618         }
619       }
620       N = nrow++ - 1;
621       high++; /* added new column index thus must search to one higher than before */
622       /* shift up all the later entries in this row */
623       for (ii=N; ii>=i; ii--) {
624         rp[ii+1] = rp[ii];
625         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626       }
627       if (N >= i) {
628         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629       }
630       rp[i] = col;
631       bap   = ap +  16*i;
632       for (ii=0; ii<4; ii++,value+=stepval) {
633         for (jj=ii; jj<16; jj+=4) {
634           bap[jj] = *value++;
635         }
636       }
637       noinsert2:;
638       low = i;
639     }
640     ailen[row] = nrow;
641   }
642   PetscFunctionReturnVoid();
643 }
644 EXTERN_C_END
645 
646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
647 #define matsetvalues4_ MATSETVALUES4
648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649 #define matsetvalues4_ matsetvalues4
650 #endif
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatSetValues4_"
655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656 {
657   Mat         A = *AA;
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660   PetscInt    *ai=a->i,*ailen=a->ilen;
661   PetscInt    *aj=a->j,brow,bcol;
662   PetscInt    ridx,cidx,lastcol = -1;
663   MatScalar   *ap,value,*aa=a->a,*bap;
664 
665   PetscFunctionBegin;
666   for (k=0; k<m; k++) { /* loop over added rows */
667     row  = im[k]; brow = row/4;
668     rp   = aj + ai[brow];
669     ap   = aa + 16*ai[brow];
670     nrow = ailen[brow];
671     low  = 0;
672     high = nrow;
673     for (l=0; l<n; l++) { /* loop over added columns */
674       col = in[l]; bcol = col/4;
675       ridx = row % 4; cidx = col % 4;
676       value = v[l + k*n];
677       if (col <= lastcol) low = 0; else high = nrow;
678       lastcol = col;
679       while (high-low > 7) {
680         t = (low+high)/2;
681         if (rp[t] > bcol) high = t;
682         else              low  = t;
683       }
684       for (i=low; i<high; i++) {
685         if (rp[i] > bcol) break;
686         if (rp[i] == bcol) {
687           bap  = ap +  16*i + 4*cidx + ridx;
688           *bap += value;
689           goto noinsert1;
690         }
691       }
692       N = nrow++ - 1;
693       high++; /* added new column thus must search to one higher than before */
694       /* shift up all the later entries in this row */
695       for (ii=N; ii>=i; ii--) {
696         rp[ii+1] = rp[ii];
697         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698       }
699       if (N>=i) {
700         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701       }
702       rp[i]                    = bcol;
703       ap[16*i + 4*cidx + ridx] = value;
704       noinsert1:;
705       low = i;
706     }
707     ailen[brow] = nrow;
708   }
709   PetscFunctionReturnVoid();
710 }
711 EXTERN_C_END
712 
713 /*  UGLY, ugly, ugly
714    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717    converts the entries into single precision and then calls ..._MatScalar() to put them
718    into the single precision data structures.
719 */
720 #if defined(PETSC_USE_MAT_SINGLE)
721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722 #else
723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724 #endif
725 
726 #define CHUNKSIZE  10
727 
728 /*
729      Checks for missing diagonals
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734 {
735   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
736   PetscErrorCode ierr;
737   PetscInt       *diag,*jj = a->j,i;
738 
739   PetscFunctionBegin;
740   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
741   diag = a->diag;
742   for (i=0; i<a->mbs; i++) {
743     if (jj[diag[i]] != i) {
744       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745     }
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753 {
754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
755   PetscErrorCode ierr;
756   PetscInt       i,j,*diag,m = a->mbs;
757 
758   PetscFunctionBegin;
759   if (a->diag) PetscFunctionReturn(0);
760 
761   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
762   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
763   for (i=0; i<m; i++) {
764     diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         diag[i] = j;
768         break;
769       }
770     }
771   }
772   a->diag = diag;
773   PetscFunctionReturn(0);
774 }
775 
776 
777 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
781 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
782 {
783   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
784   PetscErrorCode ierr;
785   PetscInt       n = a->mbs,i;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
792   } else if (oshift == 1) {
793     /* temporarily add 1 to i and j indices */
794     PetscInt nz = a->i[n];
795     for (i=0; i<nz; i++) a->j[i]++;
796     for (i=0; i<n+1; i++) a->i[i]++;
797     *ia = a->i; *ja = a->j;
798   } else {
799     *ia = a->i; *ja = a->j;
800   }
801 
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
807 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
808 {
809   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
810   PetscErrorCode ierr;
811   PetscInt       i,n = a->mbs;
812 
813   PetscFunctionBegin;
814   if (!ia) PetscFunctionReturn(0);
815   if (symmetric) {
816     ierr = PetscFree(*ia);CHKERRQ(ierr);
817     ierr = PetscFree(*ja);CHKERRQ(ierr);
818   } else if (oshift == 1) {
819     PetscInt nz = a->i[n]-1;
820     for (i=0; i<nz; i++) a->j[i]--;
821     for (i=0; i<n+1; i++) a->i[i]--;
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatDestroy_SeqBAIJ"
828 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
829 {
830   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
831   PetscErrorCode ierr;
832 
833   PetscFunctionBegin;
834 #if defined(PETSC_USE_LOG)
835   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
836 #endif
837   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
838   if (a->row) {
839     ierr = ISDestroy(a->row);CHKERRQ(ierr);
840   }
841   if (a->col) {
842     ierr = ISDestroy(a->col);CHKERRQ(ierr);
843   }
844   ierr = PetscFree(a->diag);CHKERRQ(ierr);
845   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
846   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
847   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
848   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
849   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
850   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
851 #if defined(PETSC_USE_MAT_SINGLE)
852   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
853 #endif
854   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
855   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
856 
857   ierr = PetscFree(a);CHKERRQ(ierr);
858 
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetOption_SeqBAIJ"
871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872 {
873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   switch (op) {
878   case MAT_ROW_ORIENTED:
879     a->roworiented    = PETSC_TRUE;
880     break;
881   case MAT_COLUMN_ORIENTED:
882     a->roworiented    = PETSC_FALSE;
883     break;
884   case MAT_COLUMNS_SORTED:
885     a->sorted         = PETSC_TRUE;
886     break;
887   case MAT_COLUMNS_UNSORTED:
888     a->sorted         = PETSC_FALSE;
889     break;
890   case MAT_KEEP_ZEROED_ROWS:
891     a->keepzeroedrows = PETSC_TRUE;
892     break;
893   case MAT_NO_NEW_NONZERO_LOCATIONS:
894     a->nonew          = 1;
895     break;
896   case MAT_NEW_NONZERO_LOCATION_ERR:
897     a->nonew          = -1;
898     break;
899   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900     a->nonew          = -2;
901     break;
902   case MAT_YES_NEW_NONZERO_LOCATIONS:
903     a->nonew          = 0;
904     break;
905   case MAT_ROWS_SORTED:
906   case MAT_ROWS_UNSORTED:
907   case MAT_YES_NEW_DIAGONALS:
908   case MAT_IGNORE_OFF_PROC_ENTRIES:
909   case MAT_USE_HASH_TABLE:
910     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
911     break;
912   case MAT_NO_NEW_DIAGONALS:
913     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914   case MAT_SYMMETRIC:
915   case MAT_STRUCTURALLY_SYMMETRIC:
916   case MAT_NOT_SYMMETRIC:
917   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918   case MAT_HERMITIAN:
919   case MAT_NOT_HERMITIAN:
920   case MAT_SYMMETRY_ETERNAL:
921   case MAT_NOT_SYMMETRY_ETERNAL:
922     break;
923   default:
924     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatGetRow_SeqBAIJ"
931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
932 {
933   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
934   PetscErrorCode ierr;
935   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
936   MatScalar      *aa,*aa_i;
937   PetscScalar    *v_i;
938 
939   PetscFunctionBegin;
940   bs  = A->rmap.bs;
941   ai  = a->i;
942   aj  = a->j;
943   aa  = a->a;
944   bs2 = a->bs2;
945 
946   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
947 
948   bn  = row/bs;   /* Block number */
949   bp  = row % bs; /* Block Position */
950   M   = ai[bn+1] - ai[bn];
951   *nz = bs*M;
952 
953   if (v) {
954     *v = 0;
955     if (*nz) {
956       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
957       for (i=0; i<M; i++) { /* for each block in the block row */
958         v_i  = *v + i*bs;
959         aa_i = aa + bs2*(ai[bn] + i);
960         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
961       }
962     }
963   }
964 
965   if (idx) {
966     *idx = 0;
967     if (*nz) {
968       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
969       for (i=0; i<M; i++) { /* for each block in the block row */
970         idx_i = *idx + i*bs;
971         itmp  = bs*aj[ai[bn] + i];
972         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
973       }
974     }
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
982 {
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
987   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatTranspose_SeqBAIJ"
993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
994 {
995   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
996   Mat            C;
997   PetscErrorCode ierr;
998   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
999   PetscInt       *rows,*cols,bs2=a->bs2;
1000   PetscScalar    *array;
1001 
1002   PetscFunctionBegin;
1003   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1004   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1005   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1006 
1007 #if defined(PETSC_USE_MAT_SINGLE)
1008   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1009   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1010 #else
1011   array = a->a;
1012 #endif
1013 
1014   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1015   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1016   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1017   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1018   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1019   ierr = PetscFree(col);CHKERRQ(ierr);
1020   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1021   cols = rows + bs;
1022   for (i=0; i<mbs; i++) {
1023     cols[0] = i*bs;
1024     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1025     len = ai[i+1] - ai[i];
1026     for (j=0; j<len; j++) {
1027       rows[0] = (*aj++)*bs;
1028       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1029       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1030       array += bs2;
1031     }
1032   }
1033   ierr = PetscFree(rows);CHKERRQ(ierr);
1034 #if defined(PETSC_USE_MAT_SINGLE)
1035   ierr = PetscFree(array);
1036 #endif
1037 
1038   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040 
1041   if (B) {
1042     *B = C;
1043   } else {
1044     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1051 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1052 {
1053   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1054   PetscErrorCode ierr;
1055   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1056   int            fd;
1057   PetscScalar    *aa;
1058   FILE           *file;
1059 
1060   PetscFunctionBegin;
1061   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1062   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1063   col_lens[0] = MAT_FILE_COOKIE;
1064 
1065   col_lens[1] = A->rmap.N;
1066   col_lens[2] = A->cmap.n;
1067   col_lens[3] = a->nz*bs2;
1068 
1069   /* store lengths of each row and write (including header) to file */
1070   count = 0;
1071   for (i=0; i<a->mbs; i++) {
1072     for (j=0; j<bs; j++) {
1073       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1074     }
1075   }
1076   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1077   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1078 
1079   /* store column indices (zero start index) */
1080   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1081   count = 0;
1082   for (i=0; i<a->mbs; i++) {
1083     for (j=0; j<bs; j++) {
1084       for (k=a->i[i]; k<a->i[i+1]; k++) {
1085         for (l=0; l<bs; l++) {
1086           jj[count++] = bs*a->j[k] + l;
1087         }
1088       }
1089     }
1090   }
1091   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1092   ierr = PetscFree(jj);CHKERRQ(ierr);
1093 
1094   /* store nonzero values */
1095   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1096   count = 0;
1097   for (i=0; i<a->mbs; i++) {
1098     for (j=0; j<bs; j++) {
1099       for (k=a->i[i]; k<a->i[i+1]; k++) {
1100         for (l=0; l<bs; l++) {
1101           aa[count++] = a->a[bs2*k + l*bs + j];
1102         }
1103       }
1104     }
1105   }
1106   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1107   ierr = PetscFree(aa);CHKERRQ(ierr);
1108 
1109   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1110   if (file) {
1111     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1118 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1119 {
1120   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1121   PetscErrorCode    ierr;
1122   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1123   PetscViewerFormat format;
1124 
1125   PetscFunctionBegin;
1126   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1127   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1128     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1129   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1130     Mat aij;
1131     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1132     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1133     ierr = MatDestroy(aij);CHKERRQ(ierr);
1134   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1135      PetscFunctionReturn(0);
1136   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1137     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1138     for (i=0; i<a->mbs; i++) {
1139       for (j=0; j<bs; j++) {
1140         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1141         for (k=a->i[i]; k<a->i[i+1]; k++) {
1142           for (l=0; l<bs; l++) {
1143 #if defined(PETSC_USE_COMPLEX)
1144             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1145               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1146                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1147             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1149                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1151               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1152             }
1153 #else
1154             if (a->a[bs2*k + l*bs + j] != 0.0) {
1155               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1156             }
1157 #endif
1158           }
1159         }
1160         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1161       }
1162     }
1163     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1164   } else {
1165     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1166     for (i=0; i<a->mbs; i++) {
1167       for (j=0; j<bs; j++) {
1168         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1169         for (k=a->i[i]; k<a->i[i+1]; k++) {
1170           for (l=0; l<bs; l++) {
1171 #if defined(PETSC_USE_COMPLEX)
1172             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1173               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1174                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1175             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1176               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1177                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1178             } else {
1179               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1180             }
1181 #else
1182             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1183 #endif
1184           }
1185         }
1186         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1187       }
1188     }
1189     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1190   }
1191   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1197 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1198 {
1199   Mat            A = (Mat) Aa;
1200   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1201   PetscErrorCode ierr;
1202   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1203   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1204   MatScalar      *aa;
1205   PetscViewer    viewer;
1206 
1207   PetscFunctionBegin;
1208 
1209   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1210   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1211 
1212   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1213 
1214   /* loop over matrix elements drawing boxes */
1215   color = PETSC_DRAW_BLUE;
1216   for (i=0,row=0; i<mbs; i++,row+=bs) {
1217     for (j=a->i[i]; j<a->i[i+1]; j++) {
1218       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1219       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1220       aa = a->a + j*bs2;
1221       for (k=0; k<bs; k++) {
1222         for (l=0; l<bs; l++) {
1223           if (PetscRealPart(*aa++) >=  0.) continue;
1224           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1225         }
1226       }
1227     }
1228   }
1229   color = PETSC_DRAW_CYAN;
1230   for (i=0,row=0; i<mbs; i++,row+=bs) {
1231     for (j=a->i[i]; j<a->i[i+1]; j++) {
1232       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1233       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1234       aa = a->a + j*bs2;
1235       for (k=0; k<bs; k++) {
1236         for (l=0; l<bs; l++) {
1237           if (PetscRealPart(*aa++) != 0.) continue;
1238           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1239         }
1240       }
1241     }
1242   }
1243 
1244   color = PETSC_DRAW_RED;
1245   for (i=0,row=0; i<mbs; i++,row+=bs) {
1246     for (j=a->i[i]; j<a->i[i+1]; j++) {
1247       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1248       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1249       aa = a->a + j*bs2;
1250       for (k=0; k<bs; k++) {
1251         for (l=0; l<bs; l++) {
1252           if (PetscRealPart(*aa++) <= 0.) continue;
1253           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1254         }
1255       }
1256     }
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1263 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1264 {
1265   PetscErrorCode ierr;
1266   PetscReal      xl,yl,xr,yr,w,h;
1267   PetscDraw      draw;
1268   PetscTruth     isnull;
1269 
1270   PetscFunctionBegin;
1271 
1272   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1273   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1274 
1275   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1276   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1277   xr += w;    yr += h;  xl = -w;     yl = -h;
1278   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1279   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1280   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatView_SeqBAIJ"
1286 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1287 {
1288   PetscErrorCode ierr;
1289   PetscTruth     iascii,isbinary,isdraw;
1290 
1291   PetscFunctionBegin;
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1295   if (iascii){
1296     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1297   } else if (isbinary) {
1298     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1299   } else if (isdraw) {
1300     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1301   } else {
1302     Mat B;
1303     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1304     ierr = MatView(B,viewer);CHKERRQ(ierr);
1305     ierr = MatDestroy(B);CHKERRQ(ierr);
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1313 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1314 {
1315   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1316   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1317   PetscInt    *ai = a->i,*ailen = a->ilen;
1318   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1319   MatScalar   *ap,*aa = a->a,zero = 0.0;
1320 
1321   PetscFunctionBegin;
1322   for (k=0; k<m; k++) { /* loop over rows */
1323     row  = im[k]; brow = row/bs;
1324     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1325     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1326     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1327     nrow = ailen[brow];
1328     for (l=0; l<n; l++) { /* loop over columns */
1329       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1330       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1331       col  = in[l] ;
1332       bcol = col/bs;
1333       cidx = col%bs;
1334       ridx = row%bs;
1335       high = nrow;
1336       low  = 0; /* assume unsorted */
1337       while (high-low > 5) {
1338         t = (low+high)/2;
1339         if (rp[t] > bcol) high = t;
1340         else             low  = t;
1341       }
1342       for (i=low; i<high; i++) {
1343         if (rp[i] > bcol) break;
1344         if (rp[i] == bcol) {
1345           *v++ = ap[bs2*i+bs*cidx+ridx];
1346           goto finished;
1347         }
1348       }
1349       *v++ = zero;
1350       finished:;
1351     }
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #if defined(PETSC_USE_MAT_SINGLE)
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1359 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1360 {
1361   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1362   PetscErrorCode ierr;
1363   PetscInt       i,N = m*n*b->bs2;
1364   MatScalar      *vsingle;
1365 
1366   PetscFunctionBegin;
1367   if (N > b->setvalueslen) {
1368     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1369     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1370     b->setvalueslen  = N;
1371   }
1372   vsingle = b->setvaluescopy;
1373   for (i=0; i<N; i++) {
1374     vsingle[i] = v[i];
1375   }
1376   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 #endif
1380 
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1384 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1385 {
1386   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1387   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1388   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1389   PetscErrorCode  ierr;
1390   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1391   PetscTruth      roworiented=a->roworiented;
1392   const MatScalar *value = v;
1393   MatScalar       *ap,*aa = a->a,*bap;
1394 
1395   PetscFunctionBegin;
1396   if (roworiented) {
1397     stepval = (n-1)*bs;
1398   } else {
1399     stepval = (m-1)*bs;
1400   }
1401   for (k=0; k<m; k++) { /* loop over added rows */
1402     row  = im[k];
1403     if (row < 0) continue;
1404 #if defined(PETSC_USE_DEBUG)
1405     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1406 #endif
1407     rp   = aj + ai[row];
1408     ap   = aa + bs2*ai[row];
1409     rmax = imax[row];
1410     nrow = ailen[row];
1411     low  = 0;
1412     high = nrow;
1413     for (l=0; l<n; l++) { /* loop over added columns */
1414       if (in[l] < 0) continue;
1415 #if defined(PETSC_USE_DEBUG)
1416       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1417 #endif
1418       col = in[l];
1419       if (roworiented) {
1420         value = v + k*(stepval+bs)*bs + l*bs;
1421       } else {
1422         value = v + l*(stepval+bs)*bs + k*bs;
1423       }
1424       if (col <= lastcol) low = 0; else high = nrow;
1425       lastcol = col;
1426       while (high-low > 7) {
1427         t = (low+high)/2;
1428         if (rp[t] > col) high = t;
1429         else             low  = t;
1430       }
1431       for (i=low; i<high; i++) {
1432         if (rp[i] > col) break;
1433         if (rp[i] == col) {
1434           bap  = ap +  bs2*i;
1435           if (roworiented) {
1436             if (is == ADD_VALUES) {
1437               for (ii=0; ii<bs; ii++,value+=stepval) {
1438                 for (jj=ii; jj<bs2; jj+=bs) {
1439                   bap[jj] += *value++;
1440                 }
1441               }
1442             } else {
1443               for (ii=0; ii<bs; ii++,value+=stepval) {
1444                 for (jj=ii; jj<bs2; jj+=bs) {
1445                   bap[jj] = *value++;
1446                 }
1447               }
1448             }
1449           } else {
1450             if (is == ADD_VALUES) {
1451               for (ii=0; ii<bs; ii++,value+=stepval) {
1452                 for (jj=0; jj<bs; jj++) {
1453                   *bap++ += *value++;
1454                 }
1455               }
1456             } else {
1457               for (ii=0; ii<bs; ii++,value+=stepval) {
1458                 for (jj=0; jj<bs; jj++) {
1459                   *bap++  = *value++;
1460                 }
1461               }
1462             }
1463           }
1464           goto noinsert2;
1465         }
1466       }
1467       if (nonew == 1) goto noinsert2;
1468       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1469       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew);
1470       N = nrow++ - 1; high++;
1471       /* shift up all the later entries in this row */
1472       for (ii=N; ii>=i; ii--) {
1473         rp[ii+1] = rp[ii];
1474         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1475       }
1476       if (N >= i) {
1477         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1478       }
1479       rp[i] = col;
1480       bap   = ap +  bs2*i;
1481       if (roworiented) {
1482         for (ii=0; ii<bs; ii++,value+=stepval) {
1483           for (jj=ii; jj<bs2; jj+=bs) {
1484             bap[jj] = *value++;
1485           }
1486         }
1487       } else {
1488         for (ii=0; ii<bs; ii++,value+=stepval) {
1489           for (jj=0; jj<bs; jj++) {
1490             *bap++  = *value++;
1491           }
1492         }
1493       }
1494       noinsert2:;
1495       low = i;
1496     }
1497     ailen[row] = nrow;
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1504 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1505 {
1506   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1507   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1508   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1509   PetscErrorCode ierr;
1510   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1511   MatScalar      *aa = a->a,*ap;
1512   PetscReal      ratio=0.6;
1513 
1514   PetscFunctionBegin;
1515   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1516 
1517   if (m) rmax = ailen[0];
1518   for (i=1; i<mbs; i++) {
1519     /* move each row back by the amount of empty slots (fshift) before it*/
1520     fshift += imax[i-1] - ailen[i-1];
1521     rmax   = PetscMax(rmax,ailen[i]);
1522     if (fshift) {
1523       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1524       N = ailen[i];
1525       for (j=0; j<N; j++) {
1526         ip[j-fshift] = ip[j];
1527         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1528       }
1529     }
1530     ai[i] = ai[i-1] + ailen[i-1];
1531   }
1532   if (mbs) {
1533     fshift += imax[mbs-1] - ailen[mbs-1];
1534     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1535   }
1536   /* reset ilen and imax for each row */
1537   for (i=0; i<mbs; i++) {
1538     ailen[i] = imax[i] = ai[i+1] - ai[i];
1539   }
1540   a->nz = ai[mbs];
1541 
1542   /* diagonals may have moved, so kill the diagonal pointers */
1543   a->idiagvalid = PETSC_FALSE;
1544   if (fshift && a->diag) {
1545     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1546     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1547     a->diag = 0;
1548   }
1549   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1550   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1551   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1552   a->reallocs          = 0;
1553   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1554 
1555   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1556   if (a->compressedrow.use){
1557     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1558   }
1559 
1560   A->same_nonzero = PETSC_TRUE;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /*
1565    This function returns an array of flags which indicate the locations of contiguous
1566    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1567    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1568    Assume: sizes should be long enough to hold all the values.
1569 */
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1573 {
1574   PetscInt   i,j,k,row;
1575   PetscTruth flg;
1576 
1577   PetscFunctionBegin;
1578   for (i=0,j=0; i<n; j++) {
1579     row = idx[i];
1580     if (row%bs!=0) { /* Not the begining of a block */
1581       sizes[j] = 1;
1582       i++;
1583     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1584       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1585       i++;
1586     } else { /* Begining of the block, so check if the complete block exists */
1587       flg = PETSC_TRUE;
1588       for (k=1; k<bs; k++) {
1589         if (row+k != idx[i+k]) { /* break in the block */
1590           flg = PETSC_FALSE;
1591           break;
1592         }
1593       }
1594       if (flg) { /* No break in the bs */
1595         sizes[j] = bs;
1596         i+= bs;
1597       } else {
1598         sizes[j] = 1;
1599         i++;
1600       }
1601     }
1602   }
1603   *bs_max = j;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1610 {
1611   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1612   PetscErrorCode ierr;
1613   PetscInt       i,j,k,count,*rows;
1614   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1615   PetscScalar    zero = 0.0;
1616   MatScalar      *aa;
1617 
1618   PetscFunctionBegin;
1619   /* Make a copy of the IS and  sort it */
1620   /* allocate memory for rows,sizes */
1621   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1622   sizes = rows + is_n;
1623 
1624   /* copy IS values to rows, and sort them */
1625   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1626   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1627   if (baij->keepzeroedrows) {
1628     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1629     bs_max = is_n;
1630     A->same_nonzero = PETSC_TRUE;
1631   } else {
1632     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1633     A->same_nonzero = PETSC_FALSE;
1634   }
1635 
1636   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1637     row   = rows[j];
1638     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1639     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1640     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1641     if (sizes[i] == bs && !baij->keepzeroedrows) {
1642       if (diag != 0.0) {
1643         if (baij->ilen[row/bs] > 0) {
1644           baij->ilen[row/bs]       = 1;
1645           baij->j[baij->i[row/bs]] = row/bs;
1646           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1647         }
1648         /* Now insert all the diagonal values for this bs */
1649         for (k=0; k<bs; k++) {
1650           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1651         }
1652       } else { /* (diag == 0.0) */
1653         baij->ilen[row/bs] = 0;
1654       } /* end (diag == 0.0) */
1655     } else { /* (sizes[i] != bs) */
1656 #if defined (PETSC_USE_DEBUG)
1657       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1658 #endif
1659       for (k=0; k<count; k++) {
1660         aa[0] =  zero;
1661         aa    += bs;
1662       }
1663       if (diag != 0.0) {
1664         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665       }
1666     }
1667   }
1668 
1669   ierr = PetscFree(rows);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1677 {
1678   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1679   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1680   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1681   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1682   PetscErrorCode ierr;
1683   PetscInt       ridx,cidx,bs2=a->bs2;
1684   PetscTruth     roworiented=a->roworiented;
1685   MatScalar      *ap,value,*aa=a->a,*bap;
1686 
1687   PetscFunctionBegin;
1688   for (k=0; k<m; k++) { /* loop over added rows */
1689     row  = im[k];
1690     brow = row/bs;
1691     if (row < 0) continue;
1692 #if defined(PETSC_USE_DEBUG)
1693     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1694 #endif
1695     rp   = aj + ai[brow];
1696     ap   = aa + bs2*ai[brow];
1697     rmax = imax[brow];
1698     nrow = ailen[brow];
1699     low  = 0;
1700     high = nrow;
1701     for (l=0; l<n; l++) { /* loop over added columns */
1702       if (in[l] < 0) continue;
1703 #if defined(PETSC_USE_DEBUG)
1704       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1705 #endif
1706       col = in[l]; bcol = col/bs;
1707       ridx = row % bs; cidx = col % bs;
1708       if (roworiented) {
1709         value = v[l + k*n];
1710       } else {
1711         value = v[k + l*m];
1712       }
1713       if (col <= lastcol) low = 0; else high = nrow;
1714       lastcol = col;
1715       while (high-low > 7) {
1716         t = (low+high)/2;
1717         if (rp[t] > bcol) high = t;
1718         else              low  = t;
1719       }
1720       for (i=low; i<high; i++) {
1721         if (rp[i] > bcol) break;
1722         if (rp[i] == bcol) {
1723           bap  = ap +  bs2*i + bs*cidx + ridx;
1724           if (is == ADD_VALUES) *bap += value;
1725           else                  *bap  = value;
1726           goto noinsert1;
1727         }
1728       }
1729       if (nonew == 1) goto noinsert1;
1730       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1731       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew);
1732       N = nrow++ - 1; high++;
1733       /* shift up all the later entries in this row */
1734       for (ii=N; ii>=i; ii--) {
1735         rp[ii+1] = rp[ii];
1736         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1737       }
1738       if (N>=i) {
1739         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1740       }
1741       rp[i]                      = bcol;
1742       ap[bs2*i + bs*cidx + ridx] = value;
1743       a->nz++;
1744       noinsert1:;
1745       low = i;
1746     }
1747     ailen[brow] = nrow;
1748   }
1749   A->same_nonzero = PETSC_FALSE;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1756 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1757 {
1758   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1759   Mat            outA;
1760   PetscErrorCode ierr;
1761   PetscTruth     row_identity,col_identity;
1762 
1763   PetscFunctionBegin;
1764   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1765   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1766   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1767   if (!row_identity || !col_identity) {
1768     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1769   }
1770 
1771   outA          = inA;
1772   inA->factor   = FACTOR_LU;
1773 
1774   if (!a->diag) {
1775     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1776   }
1777 
1778   a->row        = row;
1779   a->col        = col;
1780   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1781   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1782 
1783   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1784   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1785   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1786 
1787   /*
1788       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1789       for ILU(0) factorization with natural ordering
1790   */
1791   if (inA->rmap.bs < 8) {
1792     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1793   } else {
1794     if (!a->solve_work) {
1795       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1796       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1797     }
1798   }
1799 
1800   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1801 
1802   PetscFunctionReturn(0);
1803 }
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1806 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1807 {
1808   static PetscTruth called = PETSC_FALSE;
1809   MPI_Comm          comm = A->comm;
1810   PetscErrorCode    ierr;
1811 
1812   PetscFunctionBegin;
1813   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1814   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1815   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 EXTERN_C_BEGIN
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1822 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1823 {
1824   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1825   PetscInt    i,nz,nbs;
1826 
1827   PetscFunctionBegin;
1828   nz  = baij->maxnz/baij->bs2;
1829   nbs = baij->nbs;
1830   for (i=0; i<nz; i++) {
1831     baij->j[i] = indices[i];
1832   }
1833   baij->nz = nz;
1834   for (i=0; i<nbs; i++) {
1835     baij->ilen[i] = baij->imax[i];
1836   }
1837 
1838   PetscFunctionReturn(0);
1839 }
1840 EXTERN_C_END
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1844 /*@
1845     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1846        in the matrix.
1847 
1848   Input Parameters:
1849 +  mat - the SeqBAIJ matrix
1850 -  indices - the column indices
1851 
1852   Level: advanced
1853 
1854   Notes:
1855     This can be called if you have precomputed the nonzero structure of the
1856   matrix and want to provide it to the matrix object to improve the performance
1857   of the MatSetValues() operation.
1858 
1859     You MUST have set the correct numbers of nonzeros per row in the call to
1860   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1861 
1862     MUST be called before any calls to MatSetValues();
1863 
1864 @*/
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1866 {
1867   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1868 
1869   PetscFunctionBegin;
1870   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1871   PetscValidPointer(indices,2);
1872   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1873   if (f) {
1874     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1875   } else {
1876     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1877   }
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1883 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1884 {
1885   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1886   PetscErrorCode ierr;
1887   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1888   PetscReal      atmp;
1889   PetscScalar    *x,zero = 0.0;
1890   MatScalar      *aa;
1891   PetscInt       ncols,brow,krow,kcol;
1892 
1893   PetscFunctionBegin;
1894   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1895   bs   = A->rmap.bs;
1896   aa   = a->a;
1897   ai   = a->i;
1898   aj   = a->j;
1899   mbs = a->mbs;
1900 
1901   ierr = VecSet(v,zero);CHKERRQ(ierr);
1902   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1903   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1904   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1905   for (i=0; i<mbs; i++) {
1906     ncols = ai[1] - ai[0]; ai++;
1907     brow  = bs*i;
1908     for (j=0; j<ncols; j++){
1909       /* bcol = bs*(*aj); */
1910       for (kcol=0; kcol<bs; kcol++){
1911         for (krow=0; krow<bs; krow++){
1912           atmp = PetscAbsScalar(*aa); aa++;
1913           row = brow + krow;    /* row index */
1914           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1915           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1916         }
1917       }
1918       aj++;
1919     }
1920   }
1921   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatCopy_SeqBAIJ"
1927 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   /* If the two matrices have the same copy implementation, use fast copy. */
1933   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1934     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1935     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1936 
1937     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1938       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1939     }
1940     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1941   } else {
1942     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1949 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1950 {
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1960 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1961 {
1962   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1963   PetscFunctionBegin;
1964   *array = a->a;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1970 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971 {
1972   PetscFunctionBegin;
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #include "petscblaslapack.h"
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1979 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1980 {
1981   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1982   PetscErrorCode ierr;
1983   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1984   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1985 
1986   PetscFunctionBegin;
1987   if (str == SAME_NONZERO_PATTERN) {
1988     PetscScalar alpha = a;
1989     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1990   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1991     if (y->xtoy && y->XtoY != X) {
1992       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1993       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1994     }
1995     if (!y->xtoy) { /* get xtoy */
1996       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1997       y->XtoY = X;
1998     }
1999     bs2 = bs*bs;
2000     for (i=0; i<x->nz; i++) {
2001       j = 0;
2002       while (j < bs2){
2003         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2004         j++;
2005       }
2006     }
2007     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2008   } else {
2009     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2010   }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2016 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2017 {
2018   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2019   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2020   PetscScalar    *aa = a->a;
2021 
2022   PetscFunctionBegin;
2023   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2029 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2030 {
2031   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2032   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2033   PetscScalar    *aa = a->a;
2034 
2035   PetscFunctionBegin;
2036   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 
2041 /* -------------------------------------------------------------------*/
2042 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2043        MatGetRow_SeqBAIJ,
2044        MatRestoreRow_SeqBAIJ,
2045        MatMult_SeqBAIJ_N,
2046 /* 4*/ MatMultAdd_SeqBAIJ_N,
2047        MatMultTranspose_SeqBAIJ,
2048        MatMultTransposeAdd_SeqBAIJ,
2049        MatSolve_SeqBAIJ_N,
2050        0,
2051        0,
2052 /*10*/ 0,
2053        MatLUFactor_SeqBAIJ,
2054        0,
2055        0,
2056        MatTranspose_SeqBAIJ,
2057 /*15*/ MatGetInfo_SeqBAIJ,
2058        MatEqual_SeqBAIJ,
2059        MatGetDiagonal_SeqBAIJ,
2060        MatDiagonalScale_SeqBAIJ,
2061        MatNorm_SeqBAIJ,
2062 /*20*/ 0,
2063        MatAssemblyEnd_SeqBAIJ,
2064        0,
2065        MatSetOption_SeqBAIJ,
2066        MatZeroEntries_SeqBAIJ,
2067 /*25*/ MatZeroRows_SeqBAIJ,
2068        MatLUFactorSymbolic_SeqBAIJ,
2069        MatLUFactorNumeric_SeqBAIJ_N,
2070        MatCholeskyFactorSymbolic_SeqBAIJ,
2071        MatCholeskyFactorNumeric_SeqBAIJ_N,
2072 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2073        MatILUFactorSymbolic_SeqBAIJ,
2074        MatICCFactorSymbolic_SeqBAIJ,
2075        MatGetArray_SeqBAIJ,
2076        MatRestoreArray_SeqBAIJ,
2077 /*35*/ MatDuplicate_SeqBAIJ,
2078        0,
2079        0,
2080        MatILUFactor_SeqBAIJ,
2081        0,
2082 /*40*/ MatAXPY_SeqBAIJ,
2083        MatGetSubMatrices_SeqBAIJ,
2084        MatIncreaseOverlap_SeqBAIJ,
2085        MatGetValues_SeqBAIJ,
2086        MatCopy_SeqBAIJ,
2087 /*45*/ MatPrintHelp_SeqBAIJ,
2088        MatScale_SeqBAIJ,
2089        0,
2090        0,
2091        0,
2092 /*50*/ 0,
2093        MatGetRowIJ_SeqBAIJ,
2094        MatRestoreRowIJ_SeqBAIJ,
2095        0,
2096        0,
2097 /*55*/ 0,
2098        0,
2099        0,
2100        0,
2101        MatSetValuesBlocked_SeqBAIJ,
2102 /*60*/ MatGetSubMatrix_SeqBAIJ,
2103        MatDestroy_SeqBAIJ,
2104        MatView_SeqBAIJ,
2105        0,
2106        0,
2107 /*65*/ 0,
2108        0,
2109        0,
2110        0,
2111        0,
2112 /*70*/ MatGetRowMax_SeqBAIJ,
2113        MatConvert_Basic,
2114        0,
2115        0,
2116        0,
2117 /*75*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*80*/ 0,
2123        0,
2124        0,
2125        0,
2126        MatLoad_SeqBAIJ,
2127 /*85*/ 0,
2128        0,
2129        0,
2130        0,
2131        0,
2132 /*90*/ 0,
2133        0,
2134        0,
2135        0,
2136        0,
2137 /*95*/ 0,
2138        0,
2139        0,
2140        0,
2141        0,
2142 /*100*/0,
2143        0,
2144        0,
2145        0,
2146        0,
2147 /*105*/0,
2148        MatRealPart_SeqBAIJ,
2149        MatImaginaryPart_SeqBAIJ
2150 };
2151 
2152 EXTERN_C_BEGIN
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2155 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2156 {
2157   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2158   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   if (aij->nonew != 1) {
2163     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2164   }
2165 
2166   /* allocate space for values if not already there */
2167   if (!aij->saved_values) {
2168     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2169   }
2170 
2171   /* copy values over */
2172   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 EXTERN_C_END
2176 
2177 EXTERN_C_BEGIN
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2180 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2181 {
2182   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2183   PetscErrorCode ierr;
2184   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2185 
2186   PetscFunctionBegin;
2187   if (aij->nonew != 1) {
2188     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2189   }
2190   if (!aij->saved_values) {
2191     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2192   }
2193 
2194   /* copy values over */
2195   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2196   PetscFunctionReturn(0);
2197 }
2198 EXTERN_C_END
2199 
2200 EXTERN_C_BEGIN
2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2202 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2203 EXTERN_C_END
2204 
2205 EXTERN_C_BEGIN
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2208 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2209 {
2210   Mat_SeqBAIJ    *b;
2211   PetscErrorCode ierr;
2212   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2213   PetscTruth     flg,skipallocation = PETSC_FALSE;
2214 
2215   PetscFunctionBegin;
2216 
2217   if (nz == MAT_SKIP_ALLOCATION) {
2218     skipallocation = PETSC_TRUE;
2219     nz             = 0;
2220   }
2221   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2222   if (nnz && newbs != bs) {
2223     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2224   }
2225   bs   = newbs;
2226 
2227   B->rmap.bs = B->cmap.bs = bs;
2228   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2229   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2230 
2231   B->preallocated = PETSC_TRUE;
2232 
2233   mbs  = B->rmap.n/bs;
2234   nbs  = B->cmap.n/bs;
2235   bs2  = bs*bs;
2236 
2237   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2238     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2239   }
2240 
2241   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2242   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2243   if (nnz) {
2244     for (i=0; i<mbs; i++) {
2245       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2246       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2247     }
2248   }
2249 
2250   b       = (Mat_SeqBAIJ*)B->data;
2251   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2252   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2253   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2254   if (!flg) {
2255     switch (bs) {
2256     case 1:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2258       B->ops->mult            = MatMult_SeqBAIJ_1;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2260       break;
2261     case 2:
2262       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2263       B->ops->mult            = MatMult_SeqBAIJ_2;
2264       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2265       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2266       break;
2267     case 3:
2268       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2269       B->ops->mult            = MatMult_SeqBAIJ_3;
2270       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2271       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2272       break;
2273     case 4:
2274       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2275       B->ops->mult            = MatMult_SeqBAIJ_4;
2276       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2277       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2278       break;
2279     case 5:
2280       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2281       B->ops->mult            = MatMult_SeqBAIJ_5;
2282       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2283       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2284       break;
2285     case 6:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2287       B->ops->mult            = MatMult_SeqBAIJ_6;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2289       break;
2290     case 7:
2291       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2292       B->ops->mult            = MatMult_SeqBAIJ_7;
2293       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2294       break;
2295     default:
2296       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2297       B->ops->mult            = MatMult_SeqBAIJ_N;
2298       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2299       break;
2300     }
2301   }
2302   B->rmap.bs      = bs;
2303   b->mbs     = mbs;
2304   b->nbs     = nbs;
2305   if (!skipallocation) {
2306     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2307     /* b->ilen will count nonzeros in each block row so far. */
2308     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2309     if (!nnz) {
2310       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2311       else if (nz <= 0)        nz = 1;
2312       for (i=0; i<mbs; i++) b->imax[i] = nz;
2313       nz = nz*mbs;
2314     } else {
2315       nz = 0;
2316       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2317     }
2318 
2319     /* allocate the matrix space */
2320     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2321     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2322     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2323     b->singlemalloc = PETSC_TRUE;
2324 
2325     b->i[0] = 0;
2326     for (i=1; i<mbs+1; i++) {
2327       b->i[i] = b->i[i-1] + b->imax[i-1];
2328     }
2329     b->free_a     = PETSC_TRUE;
2330     b->free_ij    = PETSC_TRUE;
2331   } else {
2332     b->free_a     = PETSC_FALSE;
2333     b->free_ij    = PETSC_FALSE;
2334   }
2335 
2336   B->rmap.bs          = bs;
2337   b->bs2              = bs2;
2338   b->mbs              = mbs;
2339   b->nz               = 0;
2340   b->maxnz            = nz*bs2;
2341   B->info.nz_unneeded = (PetscReal)b->maxnz;
2342   PetscFunctionReturn(0);
2343 }
2344 EXTERN_C_END
2345 
2346 /*MC
2347    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2348    block sparse compressed row format.
2349 
2350    Options Database Keys:
2351 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2352 
2353   Level: beginner
2354 
2355 .seealso: MatCreateSeqBAIJ()
2356 M*/
2357 
2358 EXTERN_C_BEGIN
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatCreate_SeqBAIJ"
2361 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2362 {
2363   PetscErrorCode ierr;
2364   PetscMPIInt    size;
2365   Mat_SeqBAIJ    *b;
2366 
2367   PetscFunctionBegin;
2368   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2369   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2370 
2371   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2372   B->data = (void*)b;
2373   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2374   B->factor           = 0;
2375   B->mapping          = 0;
2376   b->row              = 0;
2377   b->col              = 0;
2378   b->icol             = 0;
2379   b->reallocs         = 0;
2380   b->saved_values     = 0;
2381 #if defined(PETSC_USE_MAT_SINGLE)
2382   b->setvalueslen     = 0;
2383   b->setvaluescopy    = PETSC_NULL;
2384 #endif
2385 
2386   b->sorted           = PETSC_FALSE;
2387   b->roworiented      = PETSC_TRUE;
2388   b->nonew            = 0;
2389   b->diag             = 0;
2390   b->solve_work       = 0;
2391   b->mult_work        = 0;
2392   B->spptr            = 0;
2393   B->info.nz_unneeded = (PetscReal)b->maxnz;
2394   b->keepzeroedrows   = PETSC_FALSE;
2395   b->xtoy              = 0;
2396   b->XtoY              = 0;
2397   b->compressedrow.use     = PETSC_FALSE;
2398   b->compressedrow.nrows   = 0;
2399   b->compressedrow.i       = PETSC_NULL;
2400   b->compressedrow.rindex  = PETSC_NULL;
2401   b->compressedrow.checked = PETSC_FALSE;
2402   B->same_nonzero          = PETSC_FALSE;
2403 
2404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2405                                      "MatInvertBlockDiagonal_SeqBAIJ",
2406                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2407   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2408                                      "MatStoreValues_SeqBAIJ",
2409                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2411                                      "MatRetrieveValues_SeqBAIJ",
2412                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2414                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2415                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2417                                      "MatConvert_SeqBAIJ_SeqAIJ",
2418                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2420                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2421                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2423                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2424                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2425   PetscFunctionReturn(0);
2426 }
2427 EXTERN_C_END
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2431 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2432 {
2433   Mat            C;
2434   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2435   PetscErrorCode ierr;
2436   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2437 
2438   PetscFunctionBegin;
2439   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2440 
2441   *B = 0;
2442   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2443   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2444   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2445   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2446   c    = (Mat_SeqBAIJ*)C->data;
2447 
2448   C->rmap.N   = A->rmap.N;
2449   C->cmap.N   = A->cmap.N;
2450   C->rmap.bs  = A->rmap.bs;
2451   c->bs2 = a->bs2;
2452   c->mbs = a->mbs;
2453   c->nbs = a->nbs;
2454 
2455   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2456   for (i=0; i<mbs; i++) {
2457     c->imax[i] = a->imax[i];
2458     c->ilen[i] = a->ilen[i];
2459   }
2460 
2461   /* allocate the matrix space */
2462   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2463   c->singlemalloc = PETSC_TRUE;
2464   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2465   if (mbs > 0) {
2466     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2467     if (cpvalues == MAT_COPY_VALUES) {
2468       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2469     } else {
2470       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2471     }
2472   }
2473   c->sorted      = a->sorted;
2474   c->roworiented = a->roworiented;
2475   c->nonew       = a->nonew;
2476 
2477   if (a->diag) {
2478     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2479     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2480     for (i=0; i<mbs; i++) {
2481       c->diag[i] = a->diag[i];
2482     }
2483   } else c->diag        = 0;
2484   c->nz                 = a->nz;
2485   c->maxnz              = a->maxnz;
2486   c->solve_work         = 0;
2487   c->mult_work          = 0;
2488   c->free_a             = PETSC_TRUE;
2489   c->free_ij            = PETSC_TRUE;
2490   C->preallocated       = PETSC_TRUE;
2491   C->assembled          = PETSC_TRUE;
2492 
2493   c->compressedrow.use     = a->compressedrow.use;
2494   c->compressedrow.nrows   = a->compressedrow.nrows;
2495   c->compressedrow.checked = a->compressedrow.checked;
2496   if ( a->compressedrow.checked && a->compressedrow.use){
2497     i = a->compressedrow.nrows;
2498     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2499     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2500     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2501     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2502   } else {
2503     c->compressedrow.use    = PETSC_FALSE;
2504     c->compressedrow.i      = PETSC_NULL;
2505     c->compressedrow.rindex = PETSC_NULL;
2506   }
2507   C->same_nonzero = A->same_nonzero;
2508   *B = C;
2509   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2510   PetscFunctionReturn(0);
2511 }
2512 
2513 #undef __FUNCT__
2514 #define __FUNCT__ "MatLoad_SeqBAIJ"
2515 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2516 {
2517   Mat_SeqBAIJ    *a;
2518   Mat            B;
2519   PetscErrorCode ierr;
2520   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2521   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2522   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2523   PetscInt       *masked,nmask,tmp,bs2,ishift;
2524   PetscMPIInt    size;
2525   int            fd;
2526   PetscScalar    *aa;
2527   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2528 
2529   PetscFunctionBegin;
2530   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2531   bs2  = bs*bs;
2532 
2533   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2534   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2535   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2536   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2537   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2538   M = header[1]; N = header[2]; nz = header[3];
2539 
2540   if (header[3] < 0) {
2541     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2542   }
2543 
2544   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2545 
2546   /*
2547      This code adds extra rows to make sure the number of rows is
2548     divisible by the blocksize
2549   */
2550   mbs        = M/bs;
2551   extra_rows = bs - M + bs*(mbs);
2552   if (extra_rows == bs) extra_rows = 0;
2553   else                  mbs++;
2554   if (extra_rows) {
2555     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2556   }
2557 
2558   /* read in row lengths */
2559   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2560   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2561   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2562 
2563   /* read in column indices */
2564   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2565   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2566   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2567 
2568   /* loop over row lengths determining block row lengths */
2569   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2570   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2571   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2572   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2573   masked   = mask + mbs;
2574   rowcount = 0; nzcount = 0;
2575   for (i=0; i<mbs; i++) {
2576     nmask = 0;
2577     for (j=0; j<bs; j++) {
2578       kmax = rowlengths[rowcount];
2579       for (k=0; k<kmax; k++) {
2580         tmp = jj[nzcount++]/bs;
2581         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2582       }
2583       rowcount++;
2584     }
2585     browlengths[i] += nmask;
2586     /* zero out the mask elements we set */
2587     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2588   }
2589 
2590   /* create our matrix */
2591   ierr = MatCreate(comm,&B);
2592   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2593   ierr = MatSetType(B,type);CHKERRQ(ierr);
2594   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2595   a = (Mat_SeqBAIJ*)B->data;
2596 
2597   /* set matrix "i" values */
2598   a->i[0] = 0;
2599   for (i=1; i<= mbs; i++) {
2600     a->i[i]      = a->i[i-1] + browlengths[i-1];
2601     a->ilen[i-1] = browlengths[i-1];
2602   }
2603   a->nz         = 0;
2604   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2605 
2606   /* read in nonzero values */
2607   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2608   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2609   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2610 
2611   /* set "a" and "j" values into matrix */
2612   nzcount = 0; jcount = 0;
2613   for (i=0; i<mbs; i++) {
2614     nzcountb = nzcount;
2615     nmask    = 0;
2616     for (j=0; j<bs; j++) {
2617       kmax = rowlengths[i*bs+j];
2618       for (k=0; k<kmax; k++) {
2619         tmp = jj[nzcount++]/bs;
2620 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2621       }
2622     }
2623     /* sort the masked values */
2624     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2625 
2626     /* set "j" values into matrix */
2627     maskcount = 1;
2628     for (j=0; j<nmask; j++) {
2629       a->j[jcount++]  = masked[j];
2630       mask[masked[j]] = maskcount++;
2631     }
2632     /* set "a" values into matrix */
2633     ishift = bs2*a->i[i];
2634     for (j=0; j<bs; j++) {
2635       kmax = rowlengths[i*bs+j];
2636       for (k=0; k<kmax; k++) {
2637         tmp       = jj[nzcountb]/bs ;
2638         block     = mask[tmp] - 1;
2639         point     = jj[nzcountb] - bs*tmp;
2640         idx       = ishift + bs2*block + j + bs*point;
2641         a->a[idx] = (MatScalar)aa[nzcountb++];
2642       }
2643     }
2644     /* zero out the mask elements we set */
2645     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2646   }
2647   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2648 
2649   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2650   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2651   ierr = PetscFree(aa);CHKERRQ(ierr);
2652   ierr = PetscFree(jj);CHKERRQ(ierr);
2653   ierr = PetscFree(mask);CHKERRQ(ierr);
2654 
2655   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2657   ierr = MatView_Private(B);CHKERRQ(ierr);
2658 
2659   *A = B;
2660   PetscFunctionReturn(0);
2661 }
2662 
2663 #undef __FUNCT__
2664 #define __FUNCT__ "MatCreateSeqBAIJ"
2665 /*@C
2666    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2667    compressed row) format.  For good matrix assembly performance the
2668    user should preallocate the matrix storage by setting the parameter nz
2669    (or the array nnz).  By setting these parameters accurately, performance
2670    during matrix assembly can be increased by more than a factor of 50.
2671 
2672    Collective on MPI_Comm
2673 
2674    Input Parameters:
2675 +  comm - MPI communicator, set to PETSC_COMM_SELF
2676 .  bs - size of block
2677 .  m - number of rows
2678 .  n - number of columns
2679 .  nz - number of nonzero blocks  per block row (same for all rows)
2680 -  nnz - array containing the number of nonzero blocks in the various block rows
2681          (possibly different for each block row) or PETSC_NULL
2682 
2683    Output Parameter:
2684 .  A - the matrix
2685 
2686    Options Database Keys:
2687 .   -mat_no_unroll - uses code that does not unroll the loops in the
2688                      block calculations (much slower)
2689 .    -mat_block_size - size of the blocks to use
2690 
2691    Level: intermediate
2692 
2693    Notes:
2694    The number of rows and columns must be divisible by blocksize.
2695 
2696    If the nnz parameter is given then the nz parameter is ignored
2697 
2698    A nonzero block is any block that as 1 or more nonzeros in it
2699 
2700    The block AIJ format is fully compatible with standard Fortran 77
2701    storage.  That is, the stored row and column indices can begin at
2702    either one (as in Fortran) or zero.  See the users' manual for details.
2703 
2704    Specify the preallocated storage with either nz or nnz (not both).
2705    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2706    allocation.  For additional details, see the users manual chapter on
2707    matrices.
2708 
2709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2710 @*/
2711 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2712 {
2713   PetscErrorCode ierr;
2714 
2715   PetscFunctionBegin;
2716   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2717   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2718   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2719   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2725 /*@C
2726    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2727    per row in the matrix. For good matrix assembly performance the
2728    user should preallocate the matrix storage by setting the parameter nz
2729    (or the array nnz).  By setting these parameters accurately, performance
2730    during matrix assembly can be increased by more than a factor of 50.
2731 
2732    Collective on MPI_Comm
2733 
2734    Input Parameters:
2735 +  A - the matrix
2736 .  bs - size of block
2737 .  nz - number of block nonzeros per block row (same for all rows)
2738 -  nnz - array containing the number of block nonzeros in the various block rows
2739          (possibly different for each block row) or PETSC_NULL
2740 
2741    Options Database Keys:
2742 .   -mat_no_unroll - uses code that does not unroll the loops in the
2743                      block calculations (much slower)
2744 .    -mat_block_size - size of the blocks to use
2745 
2746    Level: intermediate
2747 
2748    Notes:
2749    If the nnz parameter is given then the nz parameter is ignored
2750 
2751    The block AIJ format is fully compatible with standard Fortran 77
2752    storage.  That is, the stored row and column indices can begin at
2753    either one (as in Fortran) or zero.  See the users' manual for details.
2754 
2755    Specify the preallocated storage with either nz or nnz (not both).
2756    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2757    allocation.  For additional details, see the users manual chapter on
2758    matrices.
2759 
2760 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2761 @*/
2762 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2763 {
2764   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2765 
2766   PetscFunctionBegin;
2767   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2768   if (f) {
2769     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2770   }
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 #undef __FUNCT__
2775 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2776 /*@
2777      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2778               (upper triangular entries in CSR format) provided by the user.
2779 
2780      Collective on MPI_Comm
2781 
2782    Input Parameters:
2783 +  comm - must be an MPI communicator of size 1
2784 .  bs - size of block
2785 .  m - number of rows
2786 .  n - number of columns
2787 .  i - row indices
2788 .  j - column indices
2789 -  a - matrix values
2790 
2791    Output Parameter:
2792 .  mat - the matrix
2793 
2794    Level: intermediate
2795 
2796    Notes:
2797        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2798     once the matrix is destroyed
2799 
2800        You cannot set new nonzero locations into this matrix, that will generate an error.
2801 
2802        The i and j indices are 0 based
2803 
2804 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2805 
2806 @*/
2807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2808 {
2809   PetscErrorCode ierr;
2810   PetscInt       ii;
2811   Mat_SeqBAIJ    *baij;
2812 
2813   PetscFunctionBegin;
2814   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2815   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2816 
2817   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2818   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2819   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2820   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2821   baij = (Mat_SeqBAIJ*)(*mat)->data;
2822   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2823 
2824   baij->i = i;
2825   baij->j = j;
2826   baij->a = a;
2827   baij->singlemalloc = PETSC_FALSE;
2828   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2829   baij->free_a       = PETSC_FALSE;
2830   baij->free_ij       = PETSC_FALSE;
2831 
2832   for (ii=0; ii<m; ii++) {
2833     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2834 #if defined(PETSC_USE_DEBUG)
2835     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2836 #endif
2837   }
2838 #if defined(PETSC_USE_DEBUG)
2839   for (ii=0; ii<baij->i[m]; ii++) {
2840     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2841     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2842   }
2843 #endif
2844 
2845   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2846   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849