xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 780a600e80509eba75ebd5b520fae1ebfeb16014)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
233   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
250       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
251       i2     = 3;
252       idiag += 9;
253       for (i=1; i<m; i++) {
254 	v     = aa + 9*ai[i];
255 	vi    = aj + ai[i];
256 	nz    = diag[i] - ai[i];
257 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
258 	while (nz--) {
259 	  idx  = 3*(*vi++);
260 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
261 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
262 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
263 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
264 	  v   += 9;
265 	}
266 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
267 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
268 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
269         idiag   += 9;
270         i2      += 3;
271       }
272       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
273       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
274     }
275     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
276         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
277       i2    = 0;
278       mdiag = a->idiag+9*a->mbs;
279       for (i=0; i<m; i++) {
280         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
281         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
282         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
283         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
284         mdiag  += 9;
285         i2     += 3;
286       }
287       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
288     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
289       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
290     }
291     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
292       idiag   = a->idiag+9*a->mbs - 9;
293       i2      = 3*m - 3;
294       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
295       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
296       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
297       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
298       idiag -= 9;
299       i2    -= 3;
300       for (i=m-2; i>=0; i--) {
301 	v     = aa + 9*(diag[i]+1);
302 	vi    = aj + diag[i] + 1;
303 	nz    = ai[i+1] - diag[i] - 1;
304 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
305 	while (nz--) {
306 	  idx  = 3*(*vi++);
307 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
308 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
309 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
310 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
311 	  v   += 9;
312 	}
313 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
314 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
315 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
316         idiag   -= 9;
317         i2      -= 3;
318       }
319       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
320     }
321   } else {
322     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
323   }
324   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
325   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
331 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
332 {
333   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
334   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
335   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
336   PetscErrorCode     ierr;
337   PetscInt           m = a->mbs,i,i2,nz,idx;
338   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
339 
340   PetscFunctionBegin;
341   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
342   its = its*lits;
343   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
344   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
345   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
346   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
347   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
348 
349   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
350 
351   diag  = a->diag;
352   idiag = a->idiag;
353   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
354   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
355 
356   if (flag & SOR_ZERO_INITIAL_GUESS) {
357     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
358       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
359       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
360       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
361       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
362       i2     = 4;
363       idiag += 16;
364       for (i=1; i<m; i++) {
365 	v     = aa + 16*ai[i];
366 	vi    = aj + ai[i];
367 	nz    = diag[i] - ai[i];
368 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
369 	while (nz--) {
370 	  idx  = 4*(*vi++);
371 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
372 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
373 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
374 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
375 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
376 	  v   += 16;
377 	}
378 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
379 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
380 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
381 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
382         idiag   += 16;
383         i2      += 4;
384       }
385       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
387     }
388     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390       i2    = 0;
391       mdiag = a->idiag+16*a->mbs;
392       for (i=0; i<m; i++) {
393         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
394         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
395         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
396         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
397         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
398         mdiag  += 16;
399         i2     += 4;
400       }
401       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
402     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
404     }
405     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406       idiag   = a->idiag+16*a->mbs - 16;
407       i2      = 4*m - 4;
408       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
409       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
410       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
411       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
412       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
413       idiag -= 16;
414       i2    -= 4;
415       for (i=m-2; i>=0; i--) {
416 	v     = aa + 16*(diag[i]+1);
417 	vi    = aj + diag[i] + 1;
418 	nz    = ai[i+1] - diag[i] - 1;
419 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
420 	while (nz--) {
421 	  idx  = 4*(*vi++);
422 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
423 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
424 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
425 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
426 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
427 	  v   += 16;
428 	}
429 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
430 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
431 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
432 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
433         idiag   -= 16;
434         i2      -= 4;
435       }
436       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
437     }
438   } else {
439     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
440   }
441   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
442   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
448 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
449 {
450   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
451   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
452   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
453   PetscErrorCode     ierr;
454   PetscInt           m = a->mbs,i,i2,nz,idx;
455   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
456 
457   PetscFunctionBegin;
458   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
459   its = its*lits;
460   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
461   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
462   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
463   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
464   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
465 
466   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
467 
468   diag  = a->diag;
469   idiag = a->idiag;
470   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
472 
473   if (flag & SOR_ZERO_INITIAL_GUESS) {
474     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
475       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
476       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
477       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
478       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
479       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
480       i2     = 5;
481       idiag += 25;
482       for (i=1; i<m; i++) {
483 	v     = aa + 25*ai[i];
484 	vi    = aj + ai[i];
485 	nz    = diag[i] - ai[i];
486 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
487 	while (nz--) {
488 	  idx  = 5*(*vi++);
489 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
490 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
491 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
492 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
493 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
494 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
495 	  v   += 25;
496 	}
497 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
498 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
499 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
500 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
501 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
502         idiag   += 25;
503         i2      += 5;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+25*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
514         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
515         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
516         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
517         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
518         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
519         mdiag  += 25;
520         i2     += 5;
521       }
522       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
523     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
524       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
525     }
526     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
527       idiag   = a->idiag+25*a->mbs - 25;
528       i2      = 5*m - 5;
529       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
530       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
531       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
532       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
533       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
534       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
535       idiag -= 25;
536       i2    -= 5;
537       for (i=m-2; i>=0; i--) {
538 	v     = aa + 25*(diag[i]+1);
539 	vi    = aj + diag[i] + 1;
540 	nz    = ai[i+1] - diag[i] - 1;
541 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
542 	while (nz--) {
543 	  idx  = 5*(*vi++);
544 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
545 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
546 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
547 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
548 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
549 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
550 	  v   += 25;
551 	}
552 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
553 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
554 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
555 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
556 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
557         idiag   -= 25;
558         i2      -= 5;
559       }
560       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
561     }
562   } else {
563     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
564   }
565   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
566   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 /*
571     Special version for direct calls from Fortran (Used in PETSc-fun3d)
572 */
573 #if defined(PETSC_HAVE_FORTRAN_CAPS)
574 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
575 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
576 #define matsetvaluesblocked4_ matsetvaluesblocked4
577 #endif
578 
579 EXTERN_C_BEGIN
580 #undef __FUNCT__
581 #define __FUNCT__ "matsetvaluesblocked4_"
582 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
583 {
584   Mat               A = *AA;
585   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
586   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
587   PetscInt          *ai=a->i,*ailen=a->ilen;
588   PetscInt          *aj=a->j,stepval,lastcol = -1;
589   const PetscScalar *value = v;
590   MatScalar         *ap,*aa = a->a,*bap;
591 
592   PetscFunctionBegin;
593   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
594   stepval = (n-1)*4;
595   for (k=0; k<m; k++) { /* loop over added rows */
596     row  = im[k];
597     rp   = aj + ai[row];
598     ap   = aa + 16*ai[row];
599     nrow = ailen[row];
600     low  = 0;
601     high = nrow;
602     for (l=0; l<n; l++) { /* loop over added columns */
603       col = in[l];
604       if (col <= lastcol) low = 0; else high = nrow;
605       lastcol = col;
606       value = v + k*(stepval+4 + l)*4;
607       while (high-low > 7) {
608         t = (low+high)/2;
609         if (rp[t] > col) high = t;
610         else             low  = t;
611       }
612       for (i=low; i<high; i++) {
613         if (rp[i] > col) break;
614         if (rp[i] == col) {
615           bap  = ap +  16*i;
616           for (ii=0; ii<4; ii++,value+=stepval) {
617             for (jj=ii; jj<16; jj+=4) {
618               bap[jj] += *value++;
619             }
620           }
621           goto noinsert2;
622         }
623       }
624       N = nrow++ - 1;
625       high++; /* added new column index thus must search to one higher than before */
626       /* shift up all the later entries in this row */
627       for (ii=N; ii>=i; ii--) {
628         rp[ii+1] = rp[ii];
629         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
630       }
631       if (N >= i) {
632         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
633       }
634       rp[i] = col;
635       bap   = ap +  16*i;
636       for (ii=0; ii<4; ii++,value+=stepval) {
637         for (jj=ii; jj<16; jj+=4) {
638           bap[jj] = *value++;
639         }
640       }
641       noinsert2:;
642       low = i;
643     }
644     ailen[row] = nrow;
645   }
646   PetscFunctionReturnVoid();
647 }
648 EXTERN_C_END
649 
650 #if defined(PETSC_HAVE_FORTRAN_CAPS)
651 #define matsetvalues4_ MATSETVALUES4
652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
653 #define matsetvalues4_ matsetvalues4
654 #endif
655 
656 EXTERN_C_BEGIN
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetValues4_"
659 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
660 {
661   Mat         A = *AA;
662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
664   PetscInt    *ai=a->i,*ailen=a->ilen;
665   PetscInt    *aj=a->j,brow,bcol;
666   PetscInt    ridx,cidx,lastcol = -1;
667   MatScalar   *ap,value,*aa=a->a,*bap;
668 
669   PetscFunctionBegin;
670   for (k=0; k<m; k++) { /* loop over added rows */
671     row  = im[k]; brow = row/4;
672     rp   = aj + ai[brow];
673     ap   = aa + 16*ai[brow];
674     nrow = ailen[brow];
675     low  = 0;
676     high = nrow;
677     for (l=0; l<n; l++) { /* loop over added columns */
678       col = in[l]; bcol = col/4;
679       ridx = row % 4; cidx = col % 4;
680       value = v[l + k*n];
681       if (col <= lastcol) low = 0; else high = nrow;
682       lastcol = col;
683       while (high-low > 7) {
684         t = (low+high)/2;
685         if (rp[t] > bcol) high = t;
686         else              low  = t;
687       }
688       for (i=low; i<high; i++) {
689         if (rp[i] > bcol) break;
690         if (rp[i] == bcol) {
691           bap  = ap +  16*i + 4*cidx + ridx;
692           *bap += value;
693           goto noinsert1;
694         }
695       }
696       N = nrow++ - 1;
697       high++; /* added new column thus must search to one higher than before */
698       /* shift up all the later entries in this row */
699       for (ii=N; ii>=i; ii--) {
700         rp[ii+1] = rp[ii];
701         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
702       }
703       if (N>=i) {
704         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
705       }
706       rp[i]                    = bcol;
707       ap[16*i + 4*cidx + ridx] = value;
708       noinsert1:;
709       low = i;
710     }
711     ailen[brow] = nrow;
712   }
713   PetscFunctionReturnVoid();
714 }
715 EXTERN_C_END
716 
717 /*  UGLY, ugly, ugly
718    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
719    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
720    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
721    converts the entries into single precision and then calls ..._MatScalar() to put them
722    into the single precision data structures.
723 */
724 #if defined(PETSC_USE_MAT_SINGLE)
725 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
726 #else
727 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
728 #endif
729 
730 #define CHUNKSIZE  10
731 
732 /*
733      Checks for missing diagonals
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
737 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
738 {
739   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
740   PetscErrorCode ierr;
741   PetscInt       *diag,*jj = a->j,i;
742 
743   PetscFunctionBegin;
744   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
745   *missing = PETSC_FALSE;
746   diag     = a->diag;
747   for (i=0; i<a->mbs; i++) {
748     if (jj[diag[i]] != i) {
749       *missing  = PETSC_TRUE;
750       if (d) *d = i;
751     }
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
758 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
759 {
760   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
761   PetscErrorCode ierr;
762   PetscInt       i,j,m = a->mbs;
763 
764   PetscFunctionBegin;
765   if (!a->diag) {
766     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
767   }
768   for (i=0; i<m; i++) {
769     a->diag[i] = a->i[i+1];
770     for (j=a->i[i]; j<a->i[i+1]; j++) {
771       if (a->j[j] == i) {
772         a->diag[i] = j;
773         break;
774       }
775     }
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 
781 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
785 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
786 {
787   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
788   PetscErrorCode ierr;
789   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
790   PetscInt     *tia, *tja;
791 
792   PetscFunctionBegin;
793   *nn = n;
794   if (!ia) PetscFunctionReturn(0);
795   if (symmetric) {
796     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
797   } else {
798     tia = a->i; tja = a->j;
799   }
800 
801   if (!blockcompressed) {
802     /* malloc & create the natural set of indices */
803     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
804     for (i=0; i<n+1; i++) {
805       for (j=0; j<bs; j++) {
806         *ia[i*bs+j] = tia[i]*bs+j;
807       }
808     }
809     for (i=0; i<nz; i++) {
810       for (j=0; j<bs; j++) {
811         *ja[i*bs+j] = tia[i]*bs+j;
812       }
813     }
814     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
815       ierr = PetscFree(tia);CHKERRQ(ierr);
816       ierr = PetscFree(tja);CHKERRQ(ierr);
817     }
818   } else {
819     *ia = tia;
820     *ja = tja;
821   }
822   if (oshift == 1) {
823     for (i=0; i<nz; i++) (*ja)[i]++;
824     for (i=0; i<n+1; i++) (*ia)[i]++;
825   }
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
831 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
832 {
833   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
834   PetscErrorCode ierr;
835   PetscInt       i,n = a->mbs,nz = a->i[n];
836 
837   PetscFunctionBegin;
838   if (!ia) PetscFunctionReturn(0);
839   if (!blockcompressed) {
840     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
841   }else if (symmetric) {
842     ierr = PetscFree(*ia);CHKERRQ(ierr);
843     ierr = PetscFree(*ja);CHKERRQ(ierr);
844   } else if (oshift == 1) { /* blockcompressed */
845     for (i=0; i<nz; i++) a->j[i]--;
846     for (i=0; i<n+1; i++) a->i[i]--;
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatDestroy_SeqBAIJ"
853 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
854 {
855   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
856   PetscErrorCode ierr;
857 
858   PetscFunctionBegin;
859 #if defined(PETSC_USE_LOG)
860   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
861 #endif
862   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
863   if (a->row) {
864     ierr = ISDestroy(a->row);CHKERRQ(ierr);
865   }
866   if (a->col) {
867     ierr = ISDestroy(a->col);CHKERRQ(ierr);
868   }
869   ierr = PetscFree(a->diag);CHKERRQ(ierr);
870   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
871   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
872   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
873   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
874   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
875   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
876 #if defined(PETSC_USE_MAT_SINGLE)
877   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
878 #endif
879   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
880   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
881 
882   ierr = PetscFree(a);CHKERRQ(ierr);
883 
884   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
887   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
888   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
889   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
890   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
891   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatSetOption_SeqBAIJ"
897 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
898 {
899   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
900   PetscErrorCode ierr;
901 
902   PetscFunctionBegin;
903   switch (op) {
904   case MAT_ROW_ORIENTED:
905     a->roworiented    = flg;
906     break;
907   case MAT_KEEP_ZEROED_ROWS:
908     a->keepzeroedrows = flg;
909     break;
910   case MAT_NEW_NONZERO_LOCATIONS:
911     a->nonew          = (flg ? 0 : 1);
912     break;
913   case MAT_NEW_NONZERO_LOCATION_ERR:
914     a->nonew          = (flg ? -1 : 0);
915     break;
916   case MAT_NEW_NONZERO_ALLOCATION_ERR:
917     a->nonew          = (flg ? -2 : 0);
918     break;
919   case MAT_NEW_DIAGONALS:
920   case MAT_IGNORE_OFF_PROC_ENTRIES:
921   case MAT_USE_HASH_TABLE:
922     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
923     break;
924   case MAT_SYMMETRIC:
925   case MAT_STRUCTURALLY_SYMMETRIC:
926   case MAT_HERMITIAN:
927   case MAT_SYMMETRY_ETERNAL:
928     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
929     break;
930   default:
931     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
932   }
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatGetRow_SeqBAIJ"
938 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
939 {
940   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
941   PetscErrorCode ierr;
942   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
943   MatScalar      *aa,*aa_i;
944   PetscScalar    *v_i;
945 
946   PetscFunctionBegin;
947   bs  = A->rmap.bs;
948   ai  = a->i;
949   aj  = a->j;
950   aa  = a->a;
951   bs2 = a->bs2;
952 
953   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
954 
955   bn  = row/bs;   /* Block number */
956   bp  = row % bs; /* Block Position */
957   M   = ai[bn+1] - ai[bn];
958   *nz = bs*M;
959 
960   if (v) {
961     *v = 0;
962     if (*nz) {
963       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
964       for (i=0; i<M; i++) { /* for each block in the block row */
965         v_i  = *v + i*bs;
966         aa_i = aa + bs2*(ai[bn] + i);
967         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
968       }
969     }
970   }
971 
972   if (idx) {
973     *idx = 0;
974     if (*nz) {
975       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
976       for (i=0; i<M; i++) { /* for each block in the block row */
977         idx_i = *idx + i*bs;
978         itmp  = bs*aj[ai[bn] + i];
979         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
980       }
981     }
982   }
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
988 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
994   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
995   PetscFunctionReturn(0);
996 }
997 
998 #undef __FUNCT__
999 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1000 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1001 {
1002   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1003   Mat            C;
1004   PetscErrorCode ierr;
1005   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1006   PetscInt       *rows,*cols,bs2=a->bs2;
1007   PetscScalar    *array;
1008 
1009   PetscFunctionBegin;
1010   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1011   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1012   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1013 
1014 #if defined(PETSC_USE_MAT_SINGLE)
1015   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1016   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1017 #else
1018   array = a->a;
1019 #endif
1020 
1021   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1022   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1023   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1024   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1025   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1026   ierr = PetscFree(col);CHKERRQ(ierr);
1027   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1028   cols = rows + bs;
1029   for (i=0; i<mbs; i++) {
1030     cols[0] = i*bs;
1031     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1032     len = ai[i+1] - ai[i];
1033     for (j=0; j<len; j++) {
1034       rows[0] = (*aj++)*bs;
1035       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1036       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1037       array += bs2;
1038     }
1039   }
1040   ierr = PetscFree(rows);CHKERRQ(ierr);
1041 #if defined(PETSC_USE_MAT_SINGLE)
1042   ierr = PetscFree(array);
1043 #endif
1044 
1045   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1046   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1047 
1048   if (B) {
1049     *B = C;
1050   } else {
1051     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1052   }
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1058 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1059 {
1060   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1061   PetscErrorCode ierr;
1062   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1063   int            fd;
1064   PetscScalar    *aa;
1065   FILE           *file;
1066 
1067   PetscFunctionBegin;
1068   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1069   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1070   col_lens[0] = MAT_FILE_COOKIE;
1071 
1072   col_lens[1] = A->rmap.N;
1073   col_lens[2] = A->cmap.n;
1074   col_lens[3] = a->nz*bs2;
1075 
1076   /* store lengths of each row and write (including header) to file */
1077   count = 0;
1078   for (i=0; i<a->mbs; i++) {
1079     for (j=0; j<bs; j++) {
1080       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1081     }
1082   }
1083   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1084   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1085 
1086   /* store column indices (zero start index) */
1087   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1088   count = 0;
1089   for (i=0; i<a->mbs; i++) {
1090     for (j=0; j<bs; j++) {
1091       for (k=a->i[i]; k<a->i[i+1]; k++) {
1092         for (l=0; l<bs; l++) {
1093           jj[count++] = bs*a->j[k] + l;
1094         }
1095       }
1096     }
1097   }
1098   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1099   ierr = PetscFree(jj);CHKERRQ(ierr);
1100 
1101   /* store nonzero values */
1102   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1103   count = 0;
1104   for (i=0; i<a->mbs; i++) {
1105     for (j=0; j<bs; j++) {
1106       for (k=a->i[i]; k<a->i[i+1]; k++) {
1107         for (l=0; l<bs; l++) {
1108           aa[count++] = a->a[bs2*k + l*bs + j];
1109         }
1110       }
1111     }
1112   }
1113   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1114   ierr = PetscFree(aa);CHKERRQ(ierr);
1115 
1116   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1117   if (file) {
1118     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1119   }
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1125 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1126 {
1127   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1128   PetscErrorCode    ierr;
1129   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1130   PetscViewerFormat format;
1131 
1132   PetscFunctionBegin;
1133   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1134   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1135     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1136   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1137     Mat aij;
1138     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1139     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1140     ierr = MatDestroy(aij);CHKERRQ(ierr);
1141   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1142      PetscFunctionReturn(0);
1143   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1144     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1145     for (i=0; i<a->mbs; i++) {
1146       for (j=0; j<bs; j++) {
1147         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1148         for (k=a->i[i]; k<a->i[i+1]; k++) {
1149           for (l=0; l<bs; l++) {
1150 #if defined(PETSC_USE_COMPLEX)
1151             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1153                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1154             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1155               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1156                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1157             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1158               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1159             }
1160 #else
1161             if (a->a[bs2*k + l*bs + j] != 0.0) {
1162               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1163             }
1164 #endif
1165           }
1166         }
1167         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1168       }
1169     }
1170     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1171   } else {
1172     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1173     for (i=0; i<a->mbs; i++) {
1174       for (j=0; j<bs; j++) {
1175         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1176         for (k=a->i[i]; k<a->i[i+1]; k++) {
1177           for (l=0; l<bs; l++) {
1178 #if defined(PETSC_USE_COMPLEX)
1179             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1180               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1181                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1182             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1183               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1184                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1185             } else {
1186               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1187             }
1188 #else
1189             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1190 #endif
1191           }
1192         }
1193         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1194       }
1195     }
1196     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1197   }
1198   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 #undef __FUNCT__
1203 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1204 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1205 {
1206   Mat            A = (Mat) Aa;
1207   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1208   PetscErrorCode ierr;
1209   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1210   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1211   MatScalar      *aa;
1212   PetscViewer    viewer;
1213 
1214   PetscFunctionBegin;
1215 
1216   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1217   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1218 
1219   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1220 
1221   /* loop over matrix elements drawing boxes */
1222   color = PETSC_DRAW_BLUE;
1223   for (i=0,row=0; i<mbs; i++,row+=bs) {
1224     for (j=a->i[i]; j<a->i[i+1]; j++) {
1225       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1226       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1227       aa = a->a + j*bs2;
1228       for (k=0; k<bs; k++) {
1229         for (l=0; l<bs; l++) {
1230           if (PetscRealPart(*aa++) >=  0.) continue;
1231           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1232         }
1233       }
1234     }
1235   }
1236   color = PETSC_DRAW_CYAN;
1237   for (i=0,row=0; i<mbs; i++,row+=bs) {
1238     for (j=a->i[i]; j<a->i[i+1]; j++) {
1239       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1240       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1241       aa = a->a + j*bs2;
1242       for (k=0; k<bs; k++) {
1243         for (l=0; l<bs; l++) {
1244           if (PetscRealPart(*aa++) != 0.) continue;
1245           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1246         }
1247       }
1248     }
1249   }
1250 
1251   color = PETSC_DRAW_RED;
1252   for (i=0,row=0; i<mbs; i++,row+=bs) {
1253     for (j=a->i[i]; j<a->i[i+1]; j++) {
1254       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1255       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1256       aa = a->a + j*bs2;
1257       for (k=0; k<bs; k++) {
1258         for (l=0; l<bs; l++) {
1259           if (PetscRealPart(*aa++) <= 0.) continue;
1260           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1261         }
1262       }
1263     }
1264   }
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1270 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1271 {
1272   PetscErrorCode ierr;
1273   PetscReal      xl,yl,xr,yr,w,h;
1274   PetscDraw      draw;
1275   PetscTruth     isnull;
1276 
1277   PetscFunctionBegin;
1278 
1279   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1280   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1281 
1282   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1283   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1284   xr += w;    yr += h;  xl = -w;     yl = -h;
1285   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1286   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1287   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 #undef __FUNCT__
1292 #define __FUNCT__ "MatView_SeqBAIJ"
1293 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1294 {
1295   PetscErrorCode ierr;
1296   PetscTruth     iascii,isbinary,isdraw;
1297 
1298   PetscFunctionBegin;
1299   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1300   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1301   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1302   if (iascii){
1303     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1304   } else if (isbinary) {
1305     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1306   } else if (isdraw) {
1307     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1308   } else {
1309     Mat B;
1310     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1311     ierr = MatView(B,viewer);CHKERRQ(ierr);
1312     ierr = MatDestroy(B);CHKERRQ(ierr);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1320 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1321 {
1322   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1323   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1324   PetscInt    *ai = a->i,*ailen = a->ilen;
1325   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1326   MatScalar   *ap,*aa = a->a;
1327 
1328   PetscFunctionBegin;
1329   for (k=0; k<m; k++) { /* loop over rows */
1330     row  = im[k]; brow = row/bs;
1331     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1332     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1333     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1334     nrow = ailen[brow];
1335     for (l=0; l<n; l++) { /* loop over columns */
1336       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1337       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1338       col  = in[l] ;
1339       bcol = col/bs;
1340       cidx = col%bs;
1341       ridx = row%bs;
1342       high = nrow;
1343       low  = 0; /* assume unsorted */
1344       while (high-low > 5) {
1345         t = (low+high)/2;
1346         if (rp[t] > bcol) high = t;
1347         else             low  = t;
1348       }
1349       for (i=low; i<high; i++) {
1350         if (rp[i] > bcol) break;
1351         if (rp[i] == bcol) {
1352           *v++ = ap[bs2*i+bs*cidx+ridx];
1353           goto finished;
1354         }
1355       }
1356       *v++ = 0.0;
1357       finished:;
1358     }
1359   }
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #if defined(PETSC_USE_MAT_SINGLE)
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1366 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1367 {
1368   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1369   PetscErrorCode ierr;
1370   PetscInt       i,N = m*n*b->bs2;
1371   MatScalar      *vsingle;
1372 
1373   PetscFunctionBegin;
1374   if (N > b->setvalueslen) {
1375     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1376     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1377     b->setvalueslen  = N;
1378   }
1379   vsingle = b->setvaluescopy;
1380   for (i=0; i<N; i++) {
1381     vsingle[i] = v[i];
1382   }
1383   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 #endif
1387 
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1391 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1392 {
1393   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1394   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1395   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1396   PetscErrorCode  ierr;
1397   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1398   PetscTruth      roworiented=a->roworiented;
1399   const MatScalar *value = v;
1400   MatScalar       *ap,*aa = a->a,*bap;
1401 
1402   PetscFunctionBegin;
1403   if (roworiented) {
1404     stepval = (n-1)*bs;
1405   } else {
1406     stepval = (m-1)*bs;
1407   }
1408   for (k=0; k<m; k++) { /* loop over added rows */
1409     row  = im[k];
1410     if (row < 0) continue;
1411 #if defined(PETSC_USE_DEBUG)
1412     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1413 #endif
1414     rp   = aj + ai[row];
1415     ap   = aa + bs2*ai[row];
1416     rmax = imax[row];
1417     nrow = ailen[row];
1418     low  = 0;
1419     high = nrow;
1420     for (l=0; l<n; l++) { /* loop over added columns */
1421       if (in[l] < 0) continue;
1422 #if defined(PETSC_USE_DEBUG)
1423       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1424 #endif
1425       col = in[l];
1426       if (roworiented) {
1427         value = v + k*(stepval+bs)*bs + l*bs;
1428       } else {
1429         value = v + l*(stepval+bs)*bs + k*bs;
1430       }
1431       if (col <= lastcol) low = 0; else high = nrow;
1432       lastcol = col;
1433       while (high-low > 7) {
1434         t = (low+high)/2;
1435         if (rp[t] > col) high = t;
1436         else             low  = t;
1437       }
1438       for (i=low; i<high; i++) {
1439         if (rp[i] > col) break;
1440         if (rp[i] == col) {
1441           bap  = ap +  bs2*i;
1442           if (roworiented) {
1443             if (is == ADD_VALUES) {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=ii; jj<bs2; jj+=bs) {
1446                   bap[jj] += *value++;
1447                 }
1448               }
1449             } else {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=ii; jj<bs2; jj+=bs) {
1452                   bap[jj] = *value++;
1453                 }
1454               }
1455             }
1456           } else {
1457             if (is == ADD_VALUES) {
1458               for (ii=0; ii<bs; ii++,value+=stepval) {
1459                 for (jj=0; jj<bs; jj++) {
1460                   *bap++ += *value++;
1461                 }
1462               }
1463             } else {
1464               for (ii=0; ii<bs; ii++,value+=stepval) {
1465                 for (jj=0; jj<bs; jj++) {
1466                   *bap++  = *value++;
1467                 }
1468               }
1469             }
1470           }
1471           goto noinsert2;
1472         }
1473       }
1474       if (nonew == 1) goto noinsert2;
1475       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1476       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1477       N = nrow++ - 1; high++;
1478       /* shift up all the later entries in this row */
1479       for (ii=N; ii>=i; ii--) {
1480         rp[ii+1] = rp[ii];
1481         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1482       }
1483       if (N >= i) {
1484         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1485       }
1486       rp[i] = col;
1487       bap   = ap +  bs2*i;
1488       if (roworiented) {
1489         for (ii=0; ii<bs; ii++,value+=stepval) {
1490           for (jj=ii; jj<bs2; jj+=bs) {
1491             bap[jj] = *value++;
1492           }
1493         }
1494       } else {
1495         for (ii=0; ii<bs; ii++,value+=stepval) {
1496           for (jj=0; jj<bs; jj++) {
1497             *bap++  = *value++;
1498           }
1499         }
1500       }
1501       noinsert2:;
1502       low = i;
1503     }
1504     ailen[row] = nrow;
1505   }
1506   PetscFunctionReturn(0);
1507 }
1508 
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1511 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1512 {
1513   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1514   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1515   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1516   PetscErrorCode ierr;
1517   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1518   MatScalar      *aa = a->a,*ap;
1519   PetscReal      ratio=0.6;
1520 
1521   PetscFunctionBegin;
1522   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1523 
1524   if (m) rmax = ailen[0];
1525   for (i=1; i<mbs; i++) {
1526     /* move each row back by the amount of empty slots (fshift) before it*/
1527     fshift += imax[i-1] - ailen[i-1];
1528     rmax   = PetscMax(rmax,ailen[i]);
1529     if (fshift) {
1530       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1531       N = ailen[i];
1532       for (j=0; j<N; j++) {
1533         ip[j-fshift] = ip[j];
1534         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1535       }
1536     }
1537     ai[i] = ai[i-1] + ailen[i-1];
1538   }
1539   if (mbs) {
1540     fshift += imax[mbs-1] - ailen[mbs-1];
1541     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1542   }
1543   /* reset ilen and imax for each row */
1544   for (i=0; i<mbs; i++) {
1545     ailen[i] = imax[i] = ai[i+1] - ai[i];
1546   }
1547   a->nz = ai[mbs];
1548 
1549   /* diagonals may have moved, so kill the diagonal pointers */
1550   a->idiagvalid = PETSC_FALSE;
1551   if (fshift && a->diag) {
1552     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1553     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1554     a->diag = 0;
1555   }
1556   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1557   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1558   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1559   a->reallocs          = 0;
1560   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1561 
1562   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1563   if (a->compressedrow.use){
1564     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1565   }
1566 
1567   A->same_nonzero = PETSC_TRUE;
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 /*
1572    This function returns an array of flags which indicate the locations of contiguous
1573    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1574    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1575    Assume: sizes should be long enough to hold all the values.
1576 */
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1579 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1580 {
1581   PetscInt   i,j,k,row;
1582   PetscTruth flg;
1583 
1584   PetscFunctionBegin;
1585   for (i=0,j=0; i<n; j++) {
1586     row = idx[i];
1587     if (row%bs!=0) { /* Not the begining of a block */
1588       sizes[j] = 1;
1589       i++;
1590     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1591       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1592       i++;
1593     } else { /* Begining of the block, so check if the complete block exists */
1594       flg = PETSC_TRUE;
1595       for (k=1; k<bs; k++) {
1596         if (row+k != idx[i+k]) { /* break in the block */
1597           flg = PETSC_FALSE;
1598           break;
1599         }
1600       }
1601       if (flg) { /* No break in the bs */
1602         sizes[j] = bs;
1603         i+= bs;
1604       } else {
1605         sizes[j] = 1;
1606         i++;
1607       }
1608     }
1609   }
1610   *bs_max = j;
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1616 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1617 {
1618   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1619   PetscErrorCode ierr;
1620   PetscInt       i,j,k,count,*rows;
1621   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1622   PetscScalar    zero = 0.0;
1623   MatScalar      *aa;
1624 
1625   PetscFunctionBegin;
1626   /* Make a copy of the IS and  sort it */
1627   /* allocate memory for rows,sizes */
1628   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1629   sizes = rows + is_n;
1630 
1631   /* copy IS values to rows, and sort them */
1632   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1633   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1634   if (baij->keepzeroedrows) {
1635     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1636     bs_max = is_n;
1637     A->same_nonzero = PETSC_TRUE;
1638   } else {
1639     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1640     A->same_nonzero = PETSC_FALSE;
1641   }
1642 
1643   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1644     row   = rows[j];
1645     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1646     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1647     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1648     if (sizes[i] == bs && !baij->keepzeroedrows) {
1649       if (diag != 0.0) {
1650         if (baij->ilen[row/bs] > 0) {
1651           baij->ilen[row/bs]       = 1;
1652           baij->j[baij->i[row/bs]] = row/bs;
1653           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1654         }
1655         /* Now insert all the diagonal values for this bs */
1656         for (k=0; k<bs; k++) {
1657           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1658         }
1659       } else { /* (diag == 0.0) */
1660         baij->ilen[row/bs] = 0;
1661       } /* end (diag == 0.0) */
1662     } else { /* (sizes[i] != bs) */
1663 #if defined (PETSC_USE_DEBUG)
1664       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1665 #endif
1666       for (k=0; k<count; k++) {
1667         aa[0] =  zero;
1668         aa    += bs;
1669       }
1670       if (diag != 0.0) {
1671         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1672       }
1673     }
1674   }
1675 
1676   ierr = PetscFree(rows);CHKERRQ(ierr);
1677   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1683 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1684 {
1685   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1686   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1687   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1688   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1689   PetscErrorCode ierr;
1690   PetscInt       ridx,cidx,bs2=a->bs2;
1691   PetscTruth     roworiented=a->roworiented;
1692   MatScalar      *ap,value,*aa=a->a,*bap;
1693 
1694   PetscFunctionBegin;
1695   for (k=0; k<m; k++) { /* loop over added rows */
1696     row  = im[k];
1697     brow = row/bs;
1698     if (row < 0) continue;
1699 #if defined(PETSC_USE_DEBUG)
1700     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1701 #endif
1702     rp   = aj + ai[brow];
1703     ap   = aa + bs2*ai[brow];
1704     rmax = imax[brow];
1705     nrow = ailen[brow];
1706     low  = 0;
1707     high = nrow;
1708     for (l=0; l<n; l++) { /* loop over added columns */
1709       if (in[l] < 0) continue;
1710 #if defined(PETSC_USE_DEBUG)
1711       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1712 #endif
1713       col = in[l]; bcol = col/bs;
1714       ridx = row % bs; cidx = col % bs;
1715       if (roworiented) {
1716         value = v[l + k*n];
1717       } else {
1718         value = v[k + l*m];
1719       }
1720       if (col <= lastcol) low = 0; else high = nrow;
1721       lastcol = col;
1722       while (high-low > 7) {
1723         t = (low+high)/2;
1724         if (rp[t] > bcol) high = t;
1725         else              low  = t;
1726       }
1727       for (i=low; i<high; i++) {
1728         if (rp[i] > bcol) break;
1729         if (rp[i] == bcol) {
1730           bap  = ap +  bs2*i + bs*cidx + ridx;
1731           if (is == ADD_VALUES) *bap += value;
1732           else                  *bap  = value;
1733           goto noinsert1;
1734         }
1735       }
1736       if (nonew == 1) goto noinsert1;
1737       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1738       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1739       N = nrow++ - 1; high++;
1740       /* shift up all the later entries in this row */
1741       for (ii=N; ii>=i; ii--) {
1742         rp[ii+1] = rp[ii];
1743         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1744       }
1745       if (N>=i) {
1746         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1747       }
1748       rp[i]                      = bcol;
1749       ap[bs2*i + bs*cidx + ridx] = value;
1750       a->nz++;
1751       noinsert1:;
1752       low = i;
1753     }
1754     ailen[brow] = nrow;
1755   }
1756   A->same_nonzero = PETSC_FALSE;
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1763 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1764 {
1765   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1766   Mat            outA;
1767   PetscErrorCode ierr;
1768   PetscTruth     row_identity,col_identity;
1769 
1770   PetscFunctionBegin;
1771   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1772   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1773   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1774   if (!row_identity || !col_identity) {
1775     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1776   }
1777 
1778   outA          = inA;
1779   inA->factor   = FACTOR_LU;
1780 
1781   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1782 
1783   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1784   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1785   a->row = row;
1786   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1787   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1788   a->col = col;
1789 
1790   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1791   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1792   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1793 
1794   /*
1795       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1796       for ILU(0) factorization with natural ordering
1797   */
1798   if (inA->rmap.bs < 8) {
1799     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1800   } else {
1801     if (!a->solve_work) {
1802       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1803       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1804     }
1805   }
1806 
1807   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 
1812 EXTERN_C_BEGIN
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1815 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1816 {
1817   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1818   PetscInt    i,nz,nbs;
1819 
1820   PetscFunctionBegin;
1821   nz  = baij->maxnz/baij->bs2;
1822   nbs = baij->nbs;
1823   for (i=0; i<nz; i++) {
1824     baij->j[i] = indices[i];
1825   }
1826   baij->nz = nz;
1827   for (i=0; i<nbs; i++) {
1828     baij->ilen[i] = baij->imax[i];
1829   }
1830 
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1837 /*@
1838     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1839        in the matrix.
1840 
1841   Input Parameters:
1842 +  mat - the SeqBAIJ matrix
1843 -  indices - the column indices
1844 
1845   Level: advanced
1846 
1847   Notes:
1848     This can be called if you have precomputed the nonzero structure of the
1849   matrix and want to provide it to the matrix object to improve the performance
1850   of the MatSetValues() operation.
1851 
1852     You MUST have set the correct numbers of nonzeros per row in the call to
1853   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1854 
1855     MUST be called before any calls to MatSetValues();
1856 
1857 @*/
1858 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1859 {
1860   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1861 
1862   PetscFunctionBegin;
1863   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1864   PetscValidPointer(indices,2);
1865   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1866   if (f) {
1867     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1868   } else {
1869     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1870   }
1871   PetscFunctionReturn(0);
1872 }
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1876 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1877 {
1878   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1879   PetscErrorCode ierr;
1880   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1881   PetscReal      atmp;
1882   PetscScalar    *x,zero = 0.0;
1883   MatScalar      *aa;
1884   PetscInt       ncols,brow,krow,kcol;
1885 
1886   PetscFunctionBegin;
1887   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1888   bs   = A->rmap.bs;
1889   aa   = a->a;
1890   ai   = a->i;
1891   aj   = a->j;
1892   mbs  = a->mbs;
1893 
1894   ierr = VecSet(v,zero);CHKERRQ(ierr);
1895   if (idx) {
1896     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1897   }
1898   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1899   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1900   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1901   for (i=0; i<mbs; i++) {
1902     ncols = ai[1] - ai[0]; ai++;
1903     brow  = bs*i;
1904     for (j=0; j<ncols; j++){
1905       for (kcol=0; kcol<bs; kcol++){
1906         for (krow=0; krow<bs; krow++){
1907           atmp = PetscAbsScalar(*aa);aa++;
1908           row = brow + krow;    /* row index */
1909           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1910           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1911         }
1912       }
1913       aj++;
1914     }
1915   }
1916   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 #undef __FUNCT__
1921 #define __FUNCT__ "MatCopy_SeqBAIJ"
1922 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1923 {
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   /* If the two matrices have the same copy implementation, use fast copy. */
1928   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1929     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1930     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1931 
1932     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1933       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1934     }
1935     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1936   } else {
1937     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1944 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1945 {
1946   PetscErrorCode ierr;
1947 
1948   PetscFunctionBegin;
1949   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1955 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1956 {
1957   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1958   PetscFunctionBegin;
1959   *array = a->a;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #undef __FUNCT__
1964 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1965 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1966 {
1967   PetscFunctionBegin;
1968   PetscFunctionReturn(0);
1969 }
1970 
1971 #include "petscblaslapack.h"
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1974 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1975 {
1976   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1977   PetscErrorCode ierr;
1978   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1979   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1980 
1981   PetscFunctionBegin;
1982   if (str == SAME_NONZERO_PATTERN) {
1983     PetscScalar alpha = a;
1984     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1985   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1986     if (y->xtoy && y->XtoY != X) {
1987       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1988       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1989     }
1990     if (!y->xtoy) { /* get xtoy */
1991       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1992       y->XtoY = X;
1993     }
1994     bs2 = bs*bs;
1995     for (i=0; i<x->nz; i++) {
1996       j = 0;
1997       while (j < bs2){
1998         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1999         j++;
2000       }
2001     }
2002     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2003   } else {
2004     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2005   }
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2011 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2012 {
2013   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2014   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2015   PetscScalar    *aa = a->a;
2016 
2017   PetscFunctionBegin;
2018   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 #undef __FUNCT__
2023 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2024 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2025 {
2026   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2027   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2028   PetscScalar    *aa = a->a;
2029 
2030   PetscFunctionBegin;
2031   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 
2036 /* -------------------------------------------------------------------*/
2037 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2038        MatGetRow_SeqBAIJ,
2039        MatRestoreRow_SeqBAIJ,
2040        MatMult_SeqBAIJ_N,
2041 /* 4*/ MatMultAdd_SeqBAIJ_N,
2042        MatMultTranspose_SeqBAIJ,
2043        MatMultTransposeAdd_SeqBAIJ,
2044        MatSolve_SeqBAIJ_N,
2045        0,
2046        0,
2047 /*10*/ 0,
2048        MatLUFactor_SeqBAIJ,
2049        0,
2050        0,
2051        MatTranspose_SeqBAIJ,
2052 /*15*/ MatGetInfo_SeqBAIJ,
2053        MatEqual_SeqBAIJ,
2054        MatGetDiagonal_SeqBAIJ,
2055        MatDiagonalScale_SeqBAIJ,
2056        MatNorm_SeqBAIJ,
2057 /*20*/ 0,
2058        MatAssemblyEnd_SeqBAIJ,
2059        0,
2060        MatSetOption_SeqBAIJ,
2061        MatZeroEntries_SeqBAIJ,
2062 /*25*/ MatZeroRows_SeqBAIJ,
2063        MatLUFactorSymbolic_SeqBAIJ,
2064        MatLUFactorNumeric_SeqBAIJ_N,
2065        MatCholeskyFactorSymbolic_SeqBAIJ,
2066        MatCholeskyFactorNumeric_SeqBAIJ_N,
2067 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2068        MatILUFactorSymbolic_SeqBAIJ,
2069        MatICCFactorSymbolic_SeqBAIJ,
2070        MatGetArray_SeqBAIJ,
2071        MatRestoreArray_SeqBAIJ,
2072 /*35*/ MatDuplicate_SeqBAIJ,
2073        0,
2074        0,
2075        MatILUFactor_SeqBAIJ,
2076        0,
2077 /*40*/ MatAXPY_SeqBAIJ,
2078        MatGetSubMatrices_SeqBAIJ,
2079        MatIncreaseOverlap_SeqBAIJ,
2080        MatGetValues_SeqBAIJ,
2081        MatCopy_SeqBAIJ,
2082 /*45*/ 0,
2083        MatScale_SeqBAIJ,
2084        0,
2085        0,
2086        0,
2087 /*50*/ 0,
2088        MatGetRowIJ_SeqBAIJ,
2089        MatRestoreRowIJ_SeqBAIJ,
2090        0,
2091        0,
2092 /*55*/ 0,
2093        0,
2094        0,
2095        0,
2096        MatSetValuesBlocked_SeqBAIJ,
2097 /*60*/ MatGetSubMatrix_SeqBAIJ,
2098        MatDestroy_SeqBAIJ,
2099        MatView_SeqBAIJ,
2100        0,
2101        0,
2102 /*65*/ 0,
2103        0,
2104        0,
2105        0,
2106        0,
2107 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2108        MatConvert_Basic,
2109        0,
2110        0,
2111        0,
2112 /*75*/ 0,
2113        0,
2114        0,
2115        0,
2116        0,
2117 /*80*/ 0,
2118        0,
2119        0,
2120        0,
2121        MatLoad_SeqBAIJ,
2122 /*85*/ 0,
2123        0,
2124        0,
2125        0,
2126        0,
2127 /*90*/ 0,
2128        0,
2129        0,
2130        0,
2131        0,
2132 /*95*/ 0,
2133        0,
2134        0,
2135        0,
2136        0,
2137 /*100*/0,
2138        0,
2139        0,
2140        0,
2141        0,
2142 /*105*/0,
2143        MatRealPart_SeqBAIJ,
2144        MatImaginaryPart_SeqBAIJ,
2145        0,
2146        0,
2147 /*110*/0,
2148        0,
2149        0,
2150        0,
2151        MatMissingDiagonal_SeqBAIJ
2152 /*115*/
2153 };
2154 
2155 EXTERN_C_BEGIN
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2158 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2159 {
2160   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2161   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2162   PetscErrorCode ierr;
2163 
2164   PetscFunctionBegin;
2165   if (aij->nonew != 1) {
2166     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2167   }
2168 
2169   /* allocate space for values if not already there */
2170   if (!aij->saved_values) {
2171     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2172   }
2173 
2174   /* copy values over */
2175   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2176   PetscFunctionReturn(0);
2177 }
2178 EXTERN_C_END
2179 
2180 EXTERN_C_BEGIN
2181 #undef __FUNCT__
2182 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2183 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2184 {
2185   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2186   PetscErrorCode ierr;
2187   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2188 
2189   PetscFunctionBegin;
2190   if (aij->nonew != 1) {
2191     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2192   }
2193   if (!aij->saved_values) {
2194     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2195   }
2196 
2197   /* copy values over */
2198   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2199   PetscFunctionReturn(0);
2200 }
2201 EXTERN_C_END
2202 
2203 EXTERN_C_BEGIN
2204 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2205 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2206 EXTERN_C_END
2207 
2208 EXTERN_C_BEGIN
2209 #undef __FUNCT__
2210 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2211 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2212 {
2213   Mat_SeqBAIJ    *b;
2214   PetscErrorCode ierr;
2215   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2216   PetscTruth     flg,skipallocation = PETSC_FALSE;
2217 
2218   PetscFunctionBegin;
2219 
2220   if (nz == MAT_SKIP_ALLOCATION) {
2221     skipallocation = PETSC_TRUE;
2222     nz             = 0;
2223   }
2224 
2225   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2226     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2227   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2228 
2229   if (nnz && newbs != bs) {
2230     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2231   }
2232   bs   = newbs;
2233 
2234   B->rmap.bs = B->cmap.bs = bs;
2235   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2236   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2237 
2238   B->preallocated = PETSC_TRUE;
2239 
2240   mbs  = B->rmap.n/bs;
2241   nbs  = B->cmap.n/bs;
2242   bs2  = bs*bs;
2243 
2244   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2245     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2246   }
2247 
2248   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2249   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2250   if (nnz) {
2251     for (i=0; i<mbs; i++) {
2252       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2253       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2254     }
2255   }
2256 
2257   b       = (Mat_SeqBAIJ*)B->data;
2258   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2259     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2260   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2261 
2262   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2263   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2264   if (!flg) {
2265     switch (bs) {
2266     case 1:
2267       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2268       B->ops->mult            = MatMult_SeqBAIJ_1;
2269       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2270       break;
2271     case 2:
2272       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2273       B->ops->mult            = MatMult_SeqBAIJ_2;
2274       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2275       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2276       break;
2277     case 3:
2278       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2279       B->ops->mult            = MatMult_SeqBAIJ_3;
2280       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2281       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2282       break;
2283     case 4:
2284       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2285       B->ops->mult            = MatMult_SeqBAIJ_4;
2286       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2287       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2288       break;
2289     case 5:
2290       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2291       B->ops->mult            = MatMult_SeqBAIJ_5;
2292       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2293       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2294       break;
2295     case 6:
2296       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2297       B->ops->mult            = MatMult_SeqBAIJ_6;
2298       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2299       break;
2300     case 7:
2301       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2302       B->ops->mult            = MatMult_SeqBAIJ_7;
2303       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2304       break;
2305     default:
2306       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2307       B->ops->mult            = MatMult_SeqBAIJ_N;
2308       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2309       break;
2310     }
2311   }
2312   B->rmap.bs      = bs;
2313   b->mbs     = mbs;
2314   b->nbs     = nbs;
2315   if (!skipallocation) {
2316     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2317     /* b->ilen will count nonzeros in each block row so far. */
2318     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2319     if (!nnz) {
2320       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2321       else if (nz <= 0)        nz = 1;
2322       for (i=0; i<mbs; i++) b->imax[i] = nz;
2323       nz = nz*mbs;
2324     } else {
2325       nz = 0;
2326       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2327     }
2328 
2329     /* allocate the matrix space */
2330     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2331     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2332     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2333     b->singlemalloc = PETSC_TRUE;
2334 
2335     b->i[0] = 0;
2336     for (i=1; i<mbs+1; i++) {
2337       b->i[i] = b->i[i-1] + b->imax[i-1];
2338     }
2339     b->free_a     = PETSC_TRUE;
2340     b->free_ij    = PETSC_TRUE;
2341   } else {
2342     b->free_a     = PETSC_FALSE;
2343     b->free_ij    = PETSC_FALSE;
2344   }
2345 
2346   B->rmap.bs          = bs;
2347   b->bs2              = bs2;
2348   b->mbs              = mbs;
2349   b->nz               = 0;
2350   b->maxnz            = nz*bs2;
2351   B->info.nz_unneeded = (PetscReal)b->maxnz;
2352   PetscFunctionReturn(0);
2353 }
2354 EXTERN_C_END
2355 
2356 /*MC
2357    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2358    block sparse compressed row format.
2359 
2360    Options Database Keys:
2361 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2362 
2363   Level: beginner
2364 
2365 .seealso: MatCreateSeqBAIJ()
2366 M*/
2367 
2368 EXTERN_C_BEGIN
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatCreate_SeqBAIJ"
2371 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2372 {
2373   PetscErrorCode ierr;
2374   PetscMPIInt    size;
2375   Mat_SeqBAIJ    *b;
2376 
2377   PetscFunctionBegin;
2378   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2379   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2380 
2381   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2382   B->data = (void*)b;
2383   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2384   B->factor           = 0;
2385   B->mapping          = 0;
2386   b->row              = 0;
2387   b->col              = 0;
2388   b->icol             = 0;
2389   b->reallocs         = 0;
2390   b->saved_values     = 0;
2391 #if defined(PETSC_USE_MAT_SINGLE)
2392   b->setvalueslen     = 0;
2393   b->setvaluescopy    = PETSC_NULL;
2394 #endif
2395 
2396   b->roworiented      = PETSC_TRUE;
2397   b->nonew            = 0;
2398   b->diag             = 0;
2399   b->solve_work       = 0;
2400   b->mult_work        = 0;
2401   B->spptr            = 0;
2402   B->info.nz_unneeded = (PetscReal)b->maxnz;
2403   b->keepzeroedrows   = PETSC_FALSE;
2404   b->xtoy              = 0;
2405   b->XtoY              = 0;
2406   b->compressedrow.use     = PETSC_FALSE;
2407   b->compressedrow.nrows   = 0;
2408   b->compressedrow.i       = PETSC_NULL;
2409   b->compressedrow.rindex  = PETSC_NULL;
2410   b->compressedrow.checked = PETSC_FALSE;
2411   B->same_nonzero          = PETSC_FALSE;
2412 
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2414                                      "MatInvertBlockDiagonal_SeqBAIJ",
2415                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2417                                      "MatStoreValues_SeqBAIJ",
2418                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2419   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2420                                      "MatRetrieveValues_SeqBAIJ",
2421                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2422   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2423                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2424                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2425   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2426                                      "MatConvert_SeqBAIJ_SeqAIJ",
2427                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2428   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2429                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2430                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2431   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2432                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2433                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2434   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 EXTERN_C_END
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2441 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2442 {
2443   Mat            C;
2444   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2445   PetscErrorCode ierr;
2446   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2447 
2448   PetscFunctionBegin;
2449   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2450 
2451   *B = 0;
2452   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2453   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2454   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2455   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2456   c    = (Mat_SeqBAIJ*)C->data;
2457 
2458   C->rmap.N   = A->rmap.N;
2459   C->cmap.N   = A->cmap.N;
2460   C->rmap.bs  = A->rmap.bs;
2461   c->bs2 = a->bs2;
2462   c->mbs = a->mbs;
2463   c->nbs = a->nbs;
2464 
2465   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2466   for (i=0; i<mbs; i++) {
2467     c->imax[i] = a->imax[i];
2468     c->ilen[i] = a->ilen[i];
2469   }
2470 
2471   /* allocate the matrix space */
2472   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2473   c->singlemalloc = PETSC_TRUE;
2474   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2475   if (mbs > 0) {
2476     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2477     if (cpvalues == MAT_COPY_VALUES) {
2478       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2479     } else {
2480       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2481     }
2482   }
2483   c->roworiented = a->roworiented;
2484   c->nonew       = a->nonew;
2485 
2486   if (a->diag) {
2487     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2488     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2489     for (i=0; i<mbs; i++) {
2490       c->diag[i] = a->diag[i];
2491     }
2492   } else c->diag        = 0;
2493   c->nz                 = a->nz;
2494   c->maxnz              = a->maxnz;
2495   c->solve_work         = 0;
2496   c->mult_work          = 0;
2497   c->free_a             = PETSC_TRUE;
2498   c->free_ij            = PETSC_TRUE;
2499   C->preallocated       = PETSC_TRUE;
2500   C->assembled          = PETSC_TRUE;
2501 
2502   c->compressedrow.use     = a->compressedrow.use;
2503   c->compressedrow.nrows   = a->compressedrow.nrows;
2504   c->compressedrow.checked = a->compressedrow.checked;
2505   if ( a->compressedrow.checked && a->compressedrow.use){
2506     i = a->compressedrow.nrows;
2507     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2508     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2509     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2510     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2511   } else {
2512     c->compressedrow.use    = PETSC_FALSE;
2513     c->compressedrow.i      = PETSC_NULL;
2514     c->compressedrow.rindex = PETSC_NULL;
2515   }
2516   C->same_nonzero = A->same_nonzero;
2517   *B = C;
2518   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatLoad_SeqBAIJ"
2524 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2525 {
2526   Mat_SeqBAIJ    *a;
2527   Mat            B;
2528   PetscErrorCode ierr;
2529   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2530   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2531   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2532   PetscInt       *masked,nmask,tmp,bs2,ishift;
2533   PetscMPIInt    size;
2534   int            fd;
2535   PetscScalar    *aa;
2536   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2537 
2538   PetscFunctionBegin;
2539   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2540     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2541   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2542   bs2  = bs*bs;
2543 
2544   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2545   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2546   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2547   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2548   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2549   M = header[1]; N = header[2]; nz = header[3];
2550 
2551   if (header[3] < 0) {
2552     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2553   }
2554 
2555   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2556 
2557   /*
2558      This code adds extra rows to make sure the number of rows is
2559     divisible by the blocksize
2560   */
2561   mbs        = M/bs;
2562   extra_rows = bs - M + bs*(mbs);
2563   if (extra_rows == bs) extra_rows = 0;
2564   else                  mbs++;
2565   if (extra_rows) {
2566     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2567   }
2568 
2569   /* read in row lengths */
2570   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2571   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2572   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2573 
2574   /* read in column indices */
2575   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2576   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2577   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2578 
2579   /* loop over row lengths determining block row lengths */
2580   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2581   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2582   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2583   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2584   masked   = mask + mbs;
2585   rowcount = 0; nzcount = 0;
2586   for (i=0; i<mbs; i++) {
2587     nmask = 0;
2588     for (j=0; j<bs; j++) {
2589       kmax = rowlengths[rowcount];
2590       for (k=0; k<kmax; k++) {
2591         tmp = jj[nzcount++]/bs;
2592         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2593       }
2594       rowcount++;
2595     }
2596     browlengths[i] += nmask;
2597     /* zero out the mask elements we set */
2598     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2599   }
2600 
2601   /* create our matrix */
2602   ierr = MatCreate(comm,&B);
2603   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2604   ierr = MatSetType(B,type);CHKERRQ(ierr);
2605   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2606   a = (Mat_SeqBAIJ*)B->data;
2607 
2608   /* set matrix "i" values */
2609   a->i[0] = 0;
2610   for (i=1; i<= mbs; i++) {
2611     a->i[i]      = a->i[i-1] + browlengths[i-1];
2612     a->ilen[i-1] = browlengths[i-1];
2613   }
2614   a->nz         = 0;
2615   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2616 
2617   /* read in nonzero values */
2618   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2619   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2620   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2621 
2622   /* set "a" and "j" values into matrix */
2623   nzcount = 0; jcount = 0;
2624   for (i=0; i<mbs; i++) {
2625     nzcountb = nzcount;
2626     nmask    = 0;
2627     for (j=0; j<bs; j++) {
2628       kmax = rowlengths[i*bs+j];
2629       for (k=0; k<kmax; k++) {
2630         tmp = jj[nzcount++]/bs;
2631 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2632       }
2633     }
2634     /* sort the masked values */
2635     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2636 
2637     /* set "j" values into matrix */
2638     maskcount = 1;
2639     for (j=0; j<nmask; j++) {
2640       a->j[jcount++]  = masked[j];
2641       mask[masked[j]] = maskcount++;
2642     }
2643     /* set "a" values into matrix */
2644     ishift = bs2*a->i[i];
2645     for (j=0; j<bs; j++) {
2646       kmax = rowlengths[i*bs+j];
2647       for (k=0; k<kmax; k++) {
2648         tmp       = jj[nzcountb]/bs ;
2649         block     = mask[tmp] - 1;
2650         point     = jj[nzcountb] - bs*tmp;
2651         idx       = ishift + bs2*block + j + bs*point;
2652         a->a[idx] = (MatScalar)aa[nzcountb++];
2653       }
2654     }
2655     /* zero out the mask elements we set */
2656     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2657   }
2658   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2659 
2660   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2661   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2662   ierr = PetscFree(aa);CHKERRQ(ierr);
2663   ierr = PetscFree(jj);CHKERRQ(ierr);
2664   ierr = PetscFree(mask);CHKERRQ(ierr);
2665 
2666   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2667   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2668   ierr = MatView_Private(B);CHKERRQ(ierr);
2669 
2670   *A = B;
2671   PetscFunctionReturn(0);
2672 }
2673 
2674 #undef __FUNCT__
2675 #define __FUNCT__ "MatCreateSeqBAIJ"
2676 /*@C
2677    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2678    compressed row) format.  For good matrix assembly performance the
2679    user should preallocate the matrix storage by setting the parameter nz
2680    (or the array nnz).  By setting these parameters accurately, performance
2681    during matrix assembly can be increased by more than a factor of 50.
2682 
2683    Collective on MPI_Comm
2684 
2685    Input Parameters:
2686 +  comm - MPI communicator, set to PETSC_COMM_SELF
2687 .  bs - size of block
2688 .  m - number of rows
2689 .  n - number of columns
2690 .  nz - number of nonzero blocks  per block row (same for all rows)
2691 -  nnz - array containing the number of nonzero blocks in the various block rows
2692          (possibly different for each block row) or PETSC_NULL
2693 
2694    Output Parameter:
2695 .  A - the matrix
2696 
2697    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2698    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2699    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2700    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2701 
2702    Options Database Keys:
2703 .   -mat_no_unroll - uses code that does not unroll the loops in the
2704                      block calculations (much slower)
2705 .    -mat_block_size - size of the blocks to use
2706 
2707    Level: intermediate
2708 
2709    Notes:
2710    The number of rows and columns must be divisible by blocksize.
2711 
2712    If the nnz parameter is given then the nz parameter is ignored
2713 
2714    A nonzero block is any block that as 1 or more nonzeros in it
2715 
2716    The block AIJ format is fully compatible with standard Fortran 77
2717    storage.  That is, the stored row and column indices can begin at
2718    either one (as in Fortran) or zero.  See the users' manual for details.
2719 
2720    Specify the preallocated storage with either nz or nnz (not both).
2721    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2722    allocation.  For additional details, see the users manual chapter on
2723    matrices.
2724 
2725 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2726 @*/
2727 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2728 {
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2733   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2734   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2735   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2741 /*@C
2742    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2743    per row in the matrix. For good matrix assembly performance the
2744    user should preallocate the matrix storage by setting the parameter nz
2745    (or the array nnz).  By setting these parameters accurately, performance
2746    during matrix assembly can be increased by more than a factor of 50.
2747 
2748    Collective on MPI_Comm
2749 
2750    Input Parameters:
2751 +  A - the matrix
2752 .  bs - size of block
2753 .  nz - number of block nonzeros per block row (same for all rows)
2754 -  nnz - array containing the number of block nonzeros in the various block rows
2755          (possibly different for each block row) or PETSC_NULL
2756 
2757    Options Database Keys:
2758 .   -mat_no_unroll - uses code that does not unroll the loops in the
2759                      block calculations (much slower)
2760 .    -mat_block_size - size of the blocks to use
2761 
2762    Level: intermediate
2763 
2764    Notes:
2765    If the nnz parameter is given then the nz parameter is ignored
2766 
2767    The block AIJ format is fully compatible with standard Fortran 77
2768    storage.  That is, the stored row and column indices can begin at
2769    either one (as in Fortran) or zero.  See the users' manual for details.
2770 
2771    Specify the preallocated storage with either nz or nnz (not both).
2772    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2773    allocation.  For additional details, see the users manual chapter on
2774    matrices.
2775 
2776 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2777 @*/
2778 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2779 {
2780   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2781 
2782   PetscFunctionBegin;
2783   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2784   if (f) {
2785     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2786   }
2787   PetscFunctionReturn(0);
2788 }
2789 
2790 #undef __FUNCT__
2791 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2792 /*@
2793      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2794               (upper triangular entries in CSR format) provided by the user.
2795 
2796      Collective on MPI_Comm
2797 
2798    Input Parameters:
2799 +  comm - must be an MPI communicator of size 1
2800 .  bs - size of block
2801 .  m - number of rows
2802 .  n - number of columns
2803 .  i - row indices
2804 .  j - column indices
2805 -  a - matrix values
2806 
2807    Output Parameter:
2808 .  mat - the matrix
2809 
2810    Level: intermediate
2811 
2812    Notes:
2813        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2814     once the matrix is destroyed
2815 
2816        You cannot set new nonzero locations into this matrix, that will generate an error.
2817 
2818        The i and j indices are 0 based
2819 
2820 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2821 
2822 @*/
2823 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2824 {
2825   PetscErrorCode ierr;
2826   PetscInt       ii;
2827   Mat_SeqBAIJ    *baij;
2828 
2829   PetscFunctionBegin;
2830   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2831   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2832 
2833   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2834   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2835   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2836   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2837   baij = (Mat_SeqBAIJ*)(*mat)->data;
2838   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2839 
2840   baij->i = i;
2841   baij->j = j;
2842   baij->a = a;
2843   baij->singlemalloc = PETSC_FALSE;
2844   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2845   baij->free_a       = PETSC_FALSE;
2846   baij->free_ij       = PETSC_FALSE;
2847 
2848   for (ii=0; ii<m; ii++) {
2849     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2850 #if defined(PETSC_USE_DEBUG)
2851     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2852 #endif
2853   }
2854 #if defined(PETSC_USE_DEBUG)
2855   for (ii=0; ii<baij->i[m]; ii++) {
2856     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2857     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2858   }
2859 #endif
2860 
2861   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2862   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2863   PetscFunctionReturn(0);
2864 }
2865