xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 906ed7cc33fecbafab01746eec64dcdcc8a4842f)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for direct calls from Fortran (Used in PETSc-fun3d)
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590   stepval = (n-1)*4;
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     rp   = aj + ai[row];
594     ap   = aa + 16*ai[row];
595     nrow = ailen[row];
596     low  = 0;
597     high = nrow;
598     for (l=0; l<n; l++) { /* loop over added columns */
599       col = in[l];
600       if (col <= lastcol) low = 0; else high = nrow;
601       lastcol = col;
602       value = v + k*(stepval+4 + l)*4;
603       while (high-low > 7) {
604         t = (low+high)/2;
605         if (rp[t] > col) high = t;
606         else             low  = t;
607       }
608       for (i=low; i<high; i++) {
609         if (rp[i] > col) break;
610         if (rp[i] == col) {
611           bap  = ap +  16*i;
612           for (ii=0; ii<4; ii++,value+=stepval) {
613             for (jj=ii; jj<16; jj+=4) {
614               bap[jj] += *value++;
615             }
616           }
617           goto noinsert2;
618         }
619       }
620       N = nrow++ - 1;
621       high++; /* added new column index thus must search to one higher than before */
622       /* shift up all the later entries in this row */
623       for (ii=N; ii>=i; ii--) {
624         rp[ii+1] = rp[ii];
625         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626       }
627       if (N >= i) {
628         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629       }
630       rp[i] = col;
631       bap   = ap +  16*i;
632       for (ii=0; ii<4; ii++,value+=stepval) {
633         for (jj=ii; jj<16; jj+=4) {
634           bap[jj] = *value++;
635         }
636       }
637       noinsert2:;
638       low = i;
639     }
640     ailen[row] = nrow;
641   }
642   PetscFunctionReturnVoid();
643 }
644 EXTERN_C_END
645 
646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
647 #define matsetvalues4_ MATSETVALUES4
648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649 #define matsetvalues4_ matsetvalues4
650 #endif
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatSetValues4_"
655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656 {
657   Mat         A = *AA;
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660   PetscInt    *ai=a->i,*ailen=a->ilen;
661   PetscInt    *aj=a->j,brow,bcol;
662   PetscInt    ridx,cidx,lastcol = -1;
663   MatScalar   *ap,value,*aa=a->a,*bap;
664 
665   PetscFunctionBegin;
666   for (k=0; k<m; k++) { /* loop over added rows */
667     row  = im[k]; brow = row/4;
668     rp   = aj + ai[brow];
669     ap   = aa + 16*ai[brow];
670     nrow = ailen[brow];
671     low  = 0;
672     high = nrow;
673     for (l=0; l<n; l++) { /* loop over added columns */
674       col = in[l]; bcol = col/4;
675       ridx = row % 4; cidx = col % 4;
676       value = v[l + k*n];
677       if (col <= lastcol) low = 0; else high = nrow;
678       lastcol = col;
679       while (high-low > 7) {
680         t = (low+high)/2;
681         if (rp[t] > bcol) high = t;
682         else              low  = t;
683       }
684       for (i=low; i<high; i++) {
685         if (rp[i] > bcol) break;
686         if (rp[i] == bcol) {
687           bap  = ap +  16*i + 4*cidx + ridx;
688           *bap += value;
689           goto noinsert1;
690         }
691       }
692       N = nrow++ - 1;
693       high++; /* added new column thus must search to one higher than before */
694       /* shift up all the later entries in this row */
695       for (ii=N; ii>=i; ii--) {
696         rp[ii+1] = rp[ii];
697         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698       }
699       if (N>=i) {
700         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701       }
702       rp[i]                    = bcol;
703       ap[16*i + 4*cidx + ridx] = value;
704       noinsert1:;
705       low = i;
706     }
707     ailen[brow] = nrow;
708   }
709   PetscFunctionReturnVoid();
710 }
711 EXTERN_C_END
712 
713 /*  UGLY, ugly, ugly
714    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717    converts the entries into single precision and then calls ..._MatScalar() to put them
718    into the single precision data structures.
719 */
720 #if defined(PETSC_USE_MAT_SINGLE)
721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722 #else
723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724 #endif
725 
726 #define CHUNKSIZE  10
727 
728 /*
729      Checks for missing diagonals
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734 {
735   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
736   PetscErrorCode ierr;
737   PetscInt       *diag,*jj = a->j,i;
738 
739   PetscFunctionBegin;
740   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
741   diag = a->diag;
742   for (i=0; i<a->mbs; i++) {
743     if (jj[diag[i]] != i) {
744       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745     }
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753 {
754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
755   PetscErrorCode ierr;
756   PetscInt       i,j,m = a->mbs;
757 
758   PetscFunctionBegin;
759   if (!a->diag) {
760     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
761   }
762   for (i=0; i<m; i++) {
763     a->diag[i] = a->i[i+1];
764     for (j=a->i[i]; j<a->i[i+1]; j++) {
765       if (a->j[j] == i) {
766         a->diag[i] = j;
767         break;
768       }
769     }
770   }
771   PetscFunctionReturn(0);
772 }
773 
774 
775 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
776 
777 #undef __FUNCT__
778 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
779 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
780 {
781   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
782   PetscErrorCode ierr;
783   PetscInt       n = a->mbs,i;
784 
785   PetscFunctionBegin;
786   *nn = n;
787   if (!ia) PetscFunctionReturn(0);
788   if (symmetric) {
789     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
790   } else if (oshift == 1) {
791     /* temporarily add 1 to i and j indices */
792     PetscInt nz = a->i[n];
793     for (i=0; i<nz; i++) a->j[i]++;
794     for (i=0; i<n+1; i++) a->i[i]++;
795     *ia = a->i; *ja = a->j;
796   } else {
797     *ia = a->i; *ja = a->j;
798   }
799 
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
805 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
806 {
807   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
808   PetscErrorCode ierr;
809   PetscInt       i,n = a->mbs;
810 
811   PetscFunctionBegin;
812   if (!ia) PetscFunctionReturn(0);
813   if (symmetric) {
814     ierr = PetscFree(*ia);CHKERRQ(ierr);
815     ierr = PetscFree(*ja);CHKERRQ(ierr);
816   } else if (oshift == 1) {
817     PetscInt nz = a->i[n]-1;
818     for (i=0; i<nz; i++) a->j[i]--;
819     for (i=0; i<n+1; i++) a->i[i]--;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_SeqBAIJ"
826 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
834 #endif
835   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
836   if (a->row) {
837     ierr = ISDestroy(a->row);CHKERRQ(ierr);
838   }
839   if (a->col) {
840     ierr = ISDestroy(a->col);CHKERRQ(ierr);
841   }
842   ierr = PetscFree(a->diag);CHKERRQ(ierr);
843   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
844   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
845   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
846   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
847   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
848   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
849 #if defined(PETSC_USE_MAT_SINGLE)
850   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
851 #endif
852   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
853   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
854 
855   ierr = PetscFree(a);CHKERRQ(ierr);
856 
857   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatSetOption_SeqBAIJ"
869 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
870 {
871   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
872   PetscErrorCode ierr;
873 
874   PetscFunctionBegin;
875   switch (op) {
876   case MAT_ROW_ORIENTED:
877     a->roworiented    = PETSC_TRUE;
878     break;
879   case MAT_COLUMN_ORIENTED:
880     a->roworiented    = PETSC_FALSE;
881     break;
882   case MAT_COLUMNS_SORTED:
883     a->sorted         = PETSC_TRUE;
884     break;
885   case MAT_COLUMNS_UNSORTED:
886     a->sorted         = PETSC_FALSE;
887     break;
888   case MAT_KEEP_ZEROED_ROWS:
889     a->keepzeroedrows = PETSC_TRUE;
890     break;
891   case MAT_NO_NEW_NONZERO_LOCATIONS:
892     a->nonew          = 1;
893     break;
894   case MAT_NEW_NONZERO_LOCATION_ERR:
895     a->nonew          = -1;
896     break;
897   case MAT_NEW_NONZERO_ALLOCATION_ERR:
898     a->nonew          = -2;
899     break;
900   case MAT_YES_NEW_NONZERO_LOCATIONS:
901     a->nonew          = 0;
902     break;
903   case MAT_ROWS_SORTED:
904   case MAT_ROWS_UNSORTED:
905   case MAT_YES_NEW_DIAGONALS:
906   case MAT_IGNORE_OFF_PROC_ENTRIES:
907   case MAT_USE_HASH_TABLE:
908     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
909     break;
910   case MAT_NO_NEW_DIAGONALS:
911     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
912   case MAT_SYMMETRIC:
913   case MAT_STRUCTURALLY_SYMMETRIC:
914   case MAT_NOT_SYMMETRIC:
915   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
916   case MAT_HERMITIAN:
917   case MAT_NOT_HERMITIAN:
918   case MAT_SYMMETRY_ETERNAL:
919   case MAT_NOT_SYMMETRY_ETERNAL:
920     break;
921   default:
922     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
923   }
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatGetRow_SeqBAIJ"
929 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
930 {
931   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
932   PetscErrorCode ierr;
933   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
934   MatScalar      *aa,*aa_i;
935   PetscScalar    *v_i;
936 
937   PetscFunctionBegin;
938   bs  = A->rmap.bs;
939   ai  = a->i;
940   aj  = a->j;
941   aa  = a->a;
942   bs2 = a->bs2;
943 
944   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
945 
946   bn  = row/bs;   /* Block number */
947   bp  = row % bs; /* Block Position */
948   M   = ai[bn+1] - ai[bn];
949   *nz = bs*M;
950 
951   if (v) {
952     *v = 0;
953     if (*nz) {
954       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
955       for (i=0; i<M; i++) { /* for each block in the block row */
956         v_i  = *v + i*bs;
957         aa_i = aa + bs2*(ai[bn] + i);
958         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
959       }
960     }
961   }
962 
963   if (idx) {
964     *idx = 0;
965     if (*nz) {
966       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
967       for (i=0; i<M; i++) { /* for each block in the block row */
968         idx_i = *idx + i*bs;
969         itmp  = bs*aj[ai[bn] + i];
970         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
971       }
972     }
973   }
974   PetscFunctionReturn(0);
975 }
976 
977 #undef __FUNCT__
978 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
979 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
980 {
981   PetscErrorCode ierr;
982 
983   PetscFunctionBegin;
984   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
985   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatTranspose_SeqBAIJ"
991 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
992 {
993   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
994   Mat            C;
995   PetscErrorCode ierr;
996   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
997   PetscInt       *rows,*cols,bs2=a->bs2;
998   PetscScalar    *array;
999 
1000   PetscFunctionBegin;
1001   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1002   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1003   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1004 
1005 #if defined(PETSC_USE_MAT_SINGLE)
1006   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1007   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1008 #else
1009   array = a->a;
1010 #endif
1011 
1012   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1013   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1014   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1015   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1016   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1017   ierr = PetscFree(col);CHKERRQ(ierr);
1018   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1019   cols = rows + bs;
1020   for (i=0; i<mbs; i++) {
1021     cols[0] = i*bs;
1022     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1023     len = ai[i+1] - ai[i];
1024     for (j=0; j<len; j++) {
1025       rows[0] = (*aj++)*bs;
1026       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1027       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1028       array += bs2;
1029     }
1030   }
1031   ierr = PetscFree(rows);CHKERRQ(ierr);
1032 #if defined(PETSC_USE_MAT_SINGLE)
1033   ierr = PetscFree(array);
1034 #endif
1035 
1036   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1037   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038 
1039   if (B) {
1040     *B = C;
1041   } else {
1042     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1043   }
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1049 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1050 {
1051   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1052   PetscErrorCode ierr;
1053   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1054   int            fd;
1055   PetscScalar    *aa;
1056   FILE           *file;
1057 
1058   PetscFunctionBegin;
1059   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1060   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1061   col_lens[0] = MAT_FILE_COOKIE;
1062 
1063   col_lens[1] = A->rmap.N;
1064   col_lens[2] = A->cmap.n;
1065   col_lens[3] = a->nz*bs2;
1066 
1067   /* store lengths of each row and write (including header) to file */
1068   count = 0;
1069   for (i=0; i<a->mbs; i++) {
1070     for (j=0; j<bs; j++) {
1071       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1072     }
1073   }
1074   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1075   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1076 
1077   /* store column indices (zero start index) */
1078   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1079   count = 0;
1080   for (i=0; i<a->mbs; i++) {
1081     for (j=0; j<bs; j++) {
1082       for (k=a->i[i]; k<a->i[i+1]; k++) {
1083         for (l=0; l<bs; l++) {
1084           jj[count++] = bs*a->j[k] + l;
1085         }
1086       }
1087     }
1088   }
1089   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1090   ierr = PetscFree(jj);CHKERRQ(ierr);
1091 
1092   /* store nonzero values */
1093   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1094   count = 0;
1095   for (i=0; i<a->mbs; i++) {
1096     for (j=0; j<bs; j++) {
1097       for (k=a->i[i]; k<a->i[i+1]; k++) {
1098         for (l=0; l<bs; l++) {
1099           aa[count++] = a->a[bs2*k + l*bs + j];
1100         }
1101       }
1102     }
1103   }
1104   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1105   ierr = PetscFree(aa);CHKERRQ(ierr);
1106 
1107   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1108   if (file) {
1109     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1116 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1117 {
1118   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode    ierr;
1120   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1121   PetscViewerFormat format;
1122 
1123   PetscFunctionBegin;
1124   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1125   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1126     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1127   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1128     Mat aij;
1129     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1130     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1131     ierr = MatDestroy(aij);CHKERRQ(ierr);
1132   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1133      PetscFunctionReturn(0);
1134   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1135     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1136     for (i=0; i<a->mbs; i++) {
1137       for (j=0; j<bs; j++) {
1138         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1139         for (k=a->i[i]; k<a->i[i+1]; k++) {
1140           for (l=0; l<bs; l++) {
1141 #if defined(PETSC_USE_COMPLEX)
1142             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1143               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1144                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1145             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1147                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             }
1151 #else
1152             if (a->a[bs2*k + l*bs + j] != 0.0) {
1153               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1154             }
1155 #endif
1156           }
1157         }
1158         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1159       }
1160     }
1161     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1162   } else {
1163     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1164     for (i=0; i<a->mbs; i++) {
1165       for (j=0; j<bs; j++) {
1166         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1167         for (k=a->i[i]; k<a->i[i+1]; k++) {
1168           for (l=0; l<bs; l++) {
1169 #if defined(PETSC_USE_COMPLEX)
1170             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1171               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1172                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1173             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1174               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1175                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1176             } else {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1178             }
1179 #else
1180             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1181 #endif
1182           }
1183         }
1184         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1185       }
1186     }
1187     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1188   }
1189   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1190   PetscFunctionReturn(0);
1191 }
1192 
1193 #undef __FUNCT__
1194 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1195 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1196 {
1197   Mat            A = (Mat) Aa;
1198   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1199   PetscErrorCode ierr;
1200   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1201   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1202   MatScalar      *aa;
1203   PetscViewer    viewer;
1204 
1205   PetscFunctionBegin;
1206 
1207   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1208   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1209 
1210   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1211 
1212   /* loop over matrix elements drawing boxes */
1213   color = PETSC_DRAW_BLUE;
1214   for (i=0,row=0; i<mbs; i++,row+=bs) {
1215     for (j=a->i[i]; j<a->i[i+1]; j++) {
1216       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1217       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1218       aa = a->a + j*bs2;
1219       for (k=0; k<bs; k++) {
1220         for (l=0; l<bs; l++) {
1221           if (PetscRealPart(*aa++) >=  0.) continue;
1222           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1223         }
1224       }
1225     }
1226   }
1227   color = PETSC_DRAW_CYAN;
1228   for (i=0,row=0; i<mbs; i++,row+=bs) {
1229     for (j=a->i[i]; j<a->i[i+1]; j++) {
1230       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1231       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1232       aa = a->a + j*bs2;
1233       for (k=0; k<bs; k++) {
1234         for (l=0; l<bs; l++) {
1235           if (PetscRealPart(*aa++) != 0.) continue;
1236           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1237         }
1238       }
1239     }
1240   }
1241 
1242   color = PETSC_DRAW_RED;
1243   for (i=0,row=0; i<mbs; i++,row+=bs) {
1244     for (j=a->i[i]; j<a->i[i+1]; j++) {
1245       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1246       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1247       aa = a->a + j*bs2;
1248       for (k=0; k<bs; k++) {
1249         for (l=0; l<bs; l++) {
1250           if (PetscRealPart(*aa++) <= 0.) continue;
1251           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1252         }
1253       }
1254     }
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1261 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1262 {
1263   PetscErrorCode ierr;
1264   PetscReal      xl,yl,xr,yr,w,h;
1265   PetscDraw      draw;
1266   PetscTruth     isnull;
1267 
1268   PetscFunctionBegin;
1269 
1270   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1271   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1272 
1273   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1274   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1275   xr += w;    yr += h;  xl = -w;     yl = -h;
1276   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1277   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1278   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatView_SeqBAIJ"
1284 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1285 {
1286   PetscErrorCode ierr;
1287   PetscTruth     iascii,isbinary,isdraw;
1288 
1289   PetscFunctionBegin;
1290   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1293   if (iascii){
1294     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1295   } else if (isbinary) {
1296     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1297   } else if (isdraw) {
1298     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1299   } else {
1300     Mat B;
1301     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1302     ierr = MatView(B,viewer);CHKERRQ(ierr);
1303     ierr = MatDestroy(B);CHKERRQ(ierr);
1304   }
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1311 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1312 {
1313   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1314   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1315   PetscInt    *ai = a->i,*ailen = a->ilen;
1316   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1317   MatScalar   *ap,*aa = a->a,zero = 0.0;
1318 
1319   PetscFunctionBegin;
1320   for (k=0; k<m; k++) { /* loop over rows */
1321     row  = im[k]; brow = row/bs;
1322     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1323     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1324     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1325     nrow = ailen[brow];
1326     for (l=0; l<n; l++) { /* loop over columns */
1327       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1328       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1329       col  = in[l] ;
1330       bcol = col/bs;
1331       cidx = col%bs;
1332       ridx = row%bs;
1333       high = nrow;
1334       low  = 0; /* assume unsorted */
1335       while (high-low > 5) {
1336         t = (low+high)/2;
1337         if (rp[t] > bcol) high = t;
1338         else             low  = t;
1339       }
1340       for (i=low; i<high; i++) {
1341         if (rp[i] > bcol) break;
1342         if (rp[i] == bcol) {
1343           *v++ = ap[bs2*i+bs*cidx+ridx];
1344           goto finished;
1345         }
1346       }
1347       *v++ = zero;
1348       finished:;
1349     }
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #if defined(PETSC_USE_MAT_SINGLE)
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1357 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1358 {
1359   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1360   PetscErrorCode ierr;
1361   PetscInt       i,N = m*n*b->bs2;
1362   MatScalar      *vsingle;
1363 
1364   PetscFunctionBegin;
1365   if (N > b->setvalueslen) {
1366     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1367     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1368     b->setvalueslen  = N;
1369   }
1370   vsingle = b->setvaluescopy;
1371   for (i=0; i<N; i++) {
1372     vsingle[i] = v[i];
1373   }
1374   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 #endif
1378 
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1382 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1383 {
1384   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1385   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1386   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1387   PetscErrorCode  ierr;
1388   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1389   PetscTruth      roworiented=a->roworiented;
1390   const MatScalar *value = v;
1391   MatScalar       *ap,*aa = a->a,*bap;
1392 
1393   PetscFunctionBegin;
1394   if (roworiented) {
1395     stepval = (n-1)*bs;
1396   } else {
1397     stepval = (m-1)*bs;
1398   }
1399   for (k=0; k<m; k++) { /* loop over added rows */
1400     row  = im[k];
1401     if (row < 0) continue;
1402 #if defined(PETSC_USE_DEBUG)
1403     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1404 #endif
1405     rp   = aj + ai[row];
1406     ap   = aa + bs2*ai[row];
1407     rmax = imax[row];
1408     nrow = ailen[row];
1409     low  = 0;
1410     high = nrow;
1411     for (l=0; l<n; l++) { /* loop over added columns */
1412       if (in[l] < 0) continue;
1413 #if defined(PETSC_USE_DEBUG)
1414       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1415 #endif
1416       col = in[l];
1417       if (roworiented) {
1418         value = v + k*(stepval+bs)*bs + l*bs;
1419       } else {
1420         value = v + l*(stepval+bs)*bs + k*bs;
1421       }
1422       if (col <= lastcol) low = 0; else high = nrow;
1423       lastcol = col;
1424       while (high-low > 7) {
1425         t = (low+high)/2;
1426         if (rp[t] > col) high = t;
1427         else             low  = t;
1428       }
1429       for (i=low; i<high; i++) {
1430         if (rp[i] > col) break;
1431         if (rp[i] == col) {
1432           bap  = ap +  bs2*i;
1433           if (roworiented) {
1434             if (is == ADD_VALUES) {
1435               for (ii=0; ii<bs; ii++,value+=stepval) {
1436                 for (jj=ii; jj<bs2; jj+=bs) {
1437                   bap[jj] += *value++;
1438                 }
1439               }
1440             } else {
1441               for (ii=0; ii<bs; ii++,value+=stepval) {
1442                 for (jj=ii; jj<bs2; jj+=bs) {
1443                   bap[jj] = *value++;
1444                 }
1445               }
1446             }
1447           } else {
1448             if (is == ADD_VALUES) {
1449               for (ii=0; ii<bs; ii++,value+=stepval) {
1450                 for (jj=0; jj<bs; jj++) {
1451                   *bap++ += *value++;
1452                 }
1453               }
1454             } else {
1455               for (ii=0; ii<bs; ii++,value+=stepval) {
1456                 for (jj=0; jj<bs; jj++) {
1457                   *bap++  = *value++;
1458                 }
1459               }
1460             }
1461           }
1462           goto noinsert2;
1463         }
1464       }
1465       if (nonew == 1) goto noinsert2;
1466       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1467       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew);
1468       N = nrow++ - 1; high++;
1469       /* shift up all the later entries in this row */
1470       for (ii=N; ii>=i; ii--) {
1471         rp[ii+1] = rp[ii];
1472         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1473       }
1474       if (N >= i) {
1475         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1476       }
1477       rp[i] = col;
1478       bap   = ap +  bs2*i;
1479       if (roworiented) {
1480         for (ii=0; ii<bs; ii++,value+=stepval) {
1481           for (jj=ii; jj<bs2; jj+=bs) {
1482             bap[jj] = *value++;
1483           }
1484         }
1485       } else {
1486         for (ii=0; ii<bs; ii++,value+=stepval) {
1487           for (jj=0; jj<bs; jj++) {
1488             *bap++  = *value++;
1489           }
1490         }
1491       }
1492       noinsert2:;
1493       low = i;
1494     }
1495     ailen[row] = nrow;
1496   }
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 #undef __FUNCT__
1501 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1502 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1503 {
1504   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1505   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1506   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1507   PetscErrorCode ierr;
1508   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1509   MatScalar      *aa = a->a,*ap;
1510   PetscReal      ratio=0.6;
1511 
1512   PetscFunctionBegin;
1513   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1514 
1515   if (m) rmax = ailen[0];
1516   for (i=1; i<mbs; i++) {
1517     /* move each row back by the amount of empty slots (fshift) before it*/
1518     fshift += imax[i-1] - ailen[i-1];
1519     rmax   = PetscMax(rmax,ailen[i]);
1520     if (fshift) {
1521       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1522       N = ailen[i];
1523       for (j=0; j<N; j++) {
1524         ip[j-fshift] = ip[j];
1525         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1526       }
1527     }
1528     ai[i] = ai[i-1] + ailen[i-1];
1529   }
1530   if (mbs) {
1531     fshift += imax[mbs-1] - ailen[mbs-1];
1532     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1533   }
1534   /* reset ilen and imax for each row */
1535   for (i=0; i<mbs; i++) {
1536     ailen[i] = imax[i] = ai[i+1] - ai[i];
1537   }
1538   a->nz = ai[mbs];
1539 
1540   /* diagonals may have moved, so kill the diagonal pointers */
1541   a->idiagvalid = PETSC_FALSE;
1542   if (fshift && a->diag) {
1543     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1544     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1545     a->diag = 0;
1546   }
1547   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1548   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1549   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1550   a->reallocs          = 0;
1551   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1552 
1553   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1554   if (a->compressedrow.use){
1555     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1556   }
1557 
1558   A->same_nonzero = PETSC_TRUE;
1559   PetscFunctionReturn(0);
1560 }
1561 
1562 /*
1563    This function returns an array of flags which indicate the locations of contiguous
1564    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1565    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1566    Assume: sizes should be long enough to hold all the values.
1567 */
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1570 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1571 {
1572   PetscInt   i,j,k,row;
1573   PetscTruth flg;
1574 
1575   PetscFunctionBegin;
1576   for (i=0,j=0; i<n; j++) {
1577     row = idx[i];
1578     if (row%bs!=0) { /* Not the begining of a block */
1579       sizes[j] = 1;
1580       i++;
1581     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1582       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1583       i++;
1584     } else { /* Begining of the block, so check if the complete block exists */
1585       flg = PETSC_TRUE;
1586       for (k=1; k<bs; k++) {
1587         if (row+k != idx[i+k]) { /* break in the block */
1588           flg = PETSC_FALSE;
1589           break;
1590         }
1591       }
1592       if (flg) { /* No break in the bs */
1593         sizes[j] = bs;
1594         i+= bs;
1595       } else {
1596         sizes[j] = 1;
1597         i++;
1598       }
1599     }
1600   }
1601   *bs_max = j;
1602   PetscFunctionReturn(0);
1603 }
1604 
1605 #undef __FUNCT__
1606 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1607 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1608 {
1609   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1610   PetscErrorCode ierr;
1611   PetscInt       i,j,k,count,*rows;
1612   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1613   PetscScalar    zero = 0.0;
1614   MatScalar      *aa;
1615 
1616   PetscFunctionBegin;
1617   /* Make a copy of the IS and  sort it */
1618   /* allocate memory for rows,sizes */
1619   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1620   sizes = rows + is_n;
1621 
1622   /* copy IS values to rows, and sort them */
1623   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1624   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1625   if (baij->keepzeroedrows) {
1626     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1627     bs_max = is_n;
1628     A->same_nonzero = PETSC_TRUE;
1629   } else {
1630     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1631     A->same_nonzero = PETSC_FALSE;
1632   }
1633 
1634   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1635     row   = rows[j];
1636     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1637     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1638     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1639     if (sizes[i] == bs && !baij->keepzeroedrows) {
1640       if (diag != 0.0) {
1641         if (baij->ilen[row/bs] > 0) {
1642           baij->ilen[row/bs]       = 1;
1643           baij->j[baij->i[row/bs]] = row/bs;
1644           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1645         }
1646         /* Now insert all the diagonal values for this bs */
1647         for (k=0; k<bs; k++) {
1648           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1649         }
1650       } else { /* (diag == 0.0) */
1651         baij->ilen[row/bs] = 0;
1652       } /* end (diag == 0.0) */
1653     } else { /* (sizes[i] != bs) */
1654 #if defined (PETSC_USE_DEBUG)
1655       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1656 #endif
1657       for (k=0; k<count; k++) {
1658         aa[0] =  zero;
1659         aa    += bs;
1660       }
1661       if (diag != 0.0) {
1662         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1663       }
1664     }
1665   }
1666 
1667   ierr = PetscFree(rows);CHKERRQ(ierr);
1668   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1669   PetscFunctionReturn(0);
1670 }
1671 
1672 #undef __FUNCT__
1673 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1674 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1675 {
1676   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1677   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1678   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1679   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1680   PetscErrorCode ierr;
1681   PetscInt       ridx,cidx,bs2=a->bs2;
1682   PetscTruth     roworiented=a->roworiented;
1683   MatScalar      *ap,value,*aa=a->a,*bap;
1684 
1685   PetscFunctionBegin;
1686   for (k=0; k<m; k++) { /* loop over added rows */
1687     row  = im[k];
1688     brow = row/bs;
1689     if (row < 0) continue;
1690 #if defined(PETSC_USE_DEBUG)
1691     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1692 #endif
1693     rp   = aj + ai[brow];
1694     ap   = aa + bs2*ai[brow];
1695     rmax = imax[brow];
1696     nrow = ailen[brow];
1697     low  = 0;
1698     high = nrow;
1699     for (l=0; l<n; l++) { /* loop over added columns */
1700       if (in[l] < 0) continue;
1701 #if defined(PETSC_USE_DEBUG)
1702       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1703 #endif
1704       col = in[l]; bcol = col/bs;
1705       ridx = row % bs; cidx = col % bs;
1706       if (roworiented) {
1707         value = v[l + k*n];
1708       } else {
1709         value = v[k + l*m];
1710       }
1711       if (col <= lastcol) low = 0; else high = nrow;
1712       lastcol = col;
1713       while (high-low > 7) {
1714         t = (low+high)/2;
1715         if (rp[t] > bcol) high = t;
1716         else              low  = t;
1717       }
1718       for (i=low; i<high; i++) {
1719         if (rp[i] > bcol) break;
1720         if (rp[i] == bcol) {
1721           bap  = ap +  bs2*i + bs*cidx + ridx;
1722           if (is == ADD_VALUES) *bap += value;
1723           else                  *bap  = value;
1724           goto noinsert1;
1725         }
1726       }
1727       if (nonew == 1) goto noinsert1;
1728       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1729       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew);
1730       N = nrow++ - 1; high++;
1731       /* shift up all the later entries in this row */
1732       for (ii=N; ii>=i; ii--) {
1733         rp[ii+1] = rp[ii];
1734         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1735       }
1736       if (N>=i) {
1737         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1738       }
1739       rp[i]                      = bcol;
1740       ap[bs2*i + bs*cidx + ridx] = value;
1741       a->nz++;
1742       noinsert1:;
1743       low = i;
1744     }
1745     ailen[brow] = nrow;
1746   }
1747   A->same_nonzero = PETSC_FALSE;
1748   PetscFunctionReturn(0);
1749 }
1750 
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1754 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1755 {
1756   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1757   Mat            outA;
1758   PetscErrorCode ierr;
1759   PetscTruth     row_identity,col_identity;
1760 
1761   PetscFunctionBegin;
1762   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1763   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1764   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1765   if (!row_identity || !col_identity) {
1766     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1767   }
1768 
1769   outA          = inA;
1770   inA->factor   = FACTOR_LU;
1771 
1772   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1773 
1774   a->row        = row;
1775   a->col        = col;
1776   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1777   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1778 
1779   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1780   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1781   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1782 
1783   /*
1784       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1785       for ILU(0) factorization with natural ordering
1786   */
1787   if (inA->rmap.bs < 8) {
1788     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1789   } else {
1790     if (!a->solve_work) {
1791       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1792       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1793     }
1794   }
1795 
1796   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1797 
1798   PetscFunctionReturn(0);
1799 }
1800 
1801 EXTERN_C_BEGIN
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1804 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1805 {
1806   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1807   PetscInt    i,nz,nbs;
1808 
1809   PetscFunctionBegin;
1810   nz  = baij->maxnz/baij->bs2;
1811   nbs = baij->nbs;
1812   for (i=0; i<nz; i++) {
1813     baij->j[i] = indices[i];
1814   }
1815   baij->nz = nz;
1816   for (i=0; i<nbs; i++) {
1817     baij->ilen[i] = baij->imax[i];
1818   }
1819 
1820   PetscFunctionReturn(0);
1821 }
1822 EXTERN_C_END
1823 
1824 #undef __FUNCT__
1825 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1826 /*@
1827     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1828        in the matrix.
1829 
1830   Input Parameters:
1831 +  mat - the SeqBAIJ matrix
1832 -  indices - the column indices
1833 
1834   Level: advanced
1835 
1836   Notes:
1837     This can be called if you have precomputed the nonzero structure of the
1838   matrix and want to provide it to the matrix object to improve the performance
1839   of the MatSetValues() operation.
1840 
1841     You MUST have set the correct numbers of nonzeros per row in the call to
1842   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1843 
1844     MUST be called before any calls to MatSetValues();
1845 
1846 @*/
1847 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1848 {
1849   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1850 
1851   PetscFunctionBegin;
1852   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1853   PetscValidPointer(indices,2);
1854   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1855   if (f) {
1856     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1857   } else {
1858     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1865 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1866 {
1867   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1868   PetscErrorCode ierr;
1869   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1870   PetscReal      atmp;
1871   PetscScalar    *x,zero = 0.0;
1872   MatScalar      *aa;
1873   PetscInt       ncols,brow,krow,kcol;
1874 
1875   PetscFunctionBegin;
1876   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1877   bs   = A->rmap.bs;
1878   aa   = a->a;
1879   ai   = a->i;
1880   aj   = a->j;
1881   mbs = a->mbs;
1882 
1883   ierr = VecSet(v,zero);CHKERRQ(ierr);
1884   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1885   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1886   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1887   for (i=0; i<mbs; i++) {
1888     ncols = ai[1] - ai[0]; ai++;
1889     brow  = bs*i;
1890     for (j=0; j<ncols; j++){
1891       /* bcol = bs*(*aj); */
1892       for (kcol=0; kcol<bs; kcol++){
1893         for (krow=0; krow<bs; krow++){
1894           atmp = PetscAbsScalar(*aa); aa++;
1895           row = brow + krow;    /* row index */
1896           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1897           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1898         }
1899       }
1900       aj++;
1901     }
1902   }
1903   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatCopy_SeqBAIJ"
1909 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1910 {
1911   PetscErrorCode ierr;
1912 
1913   PetscFunctionBegin;
1914   /* If the two matrices have the same copy implementation, use fast copy. */
1915   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1916     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1917     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1918 
1919     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1920       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1921     }
1922     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1923   } else {
1924     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1925   }
1926   PetscFunctionReturn(0);
1927 }
1928 
1929 #undef __FUNCT__
1930 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1931 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1932 {
1933   PetscErrorCode ierr;
1934 
1935   PetscFunctionBegin;
1936   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1942 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1943 {
1944   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1945   PetscFunctionBegin;
1946   *array = a->a;
1947   PetscFunctionReturn(0);
1948 }
1949 
1950 #undef __FUNCT__
1951 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1952 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1953 {
1954   PetscFunctionBegin;
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #include "petscblaslapack.h"
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1961 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1962 {
1963   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1964   PetscErrorCode ierr;
1965   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1966   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1967 
1968   PetscFunctionBegin;
1969   if (str == SAME_NONZERO_PATTERN) {
1970     PetscScalar alpha = a;
1971     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1972   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1973     if (y->xtoy && y->XtoY != X) {
1974       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1975       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1976     }
1977     if (!y->xtoy) { /* get xtoy */
1978       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1979       y->XtoY = X;
1980     }
1981     bs2 = bs*bs;
1982     for (i=0; i<x->nz; i++) {
1983       j = 0;
1984       while (j < bs2){
1985         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1986         j++;
1987       }
1988     }
1989     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1990   } else {
1991     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1992   }
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #undef __FUNCT__
1997 #define __FUNCT__ "MatRealPart_SeqBAIJ"
1998 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
1999 {
2000   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2001   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2002   PetscScalar    *aa = a->a;
2003 
2004   PetscFunctionBegin;
2005   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 #undef __FUNCT__
2010 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2011 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2012 {
2013   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2014   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2015   PetscScalar    *aa = a->a;
2016 
2017   PetscFunctionBegin;
2018   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2019   PetscFunctionReturn(0);
2020 }
2021 
2022 
2023 /* -------------------------------------------------------------------*/
2024 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2025        MatGetRow_SeqBAIJ,
2026        MatRestoreRow_SeqBAIJ,
2027        MatMult_SeqBAIJ_N,
2028 /* 4*/ MatMultAdd_SeqBAIJ_N,
2029        MatMultTranspose_SeqBAIJ,
2030        MatMultTransposeAdd_SeqBAIJ,
2031        MatSolve_SeqBAIJ_N,
2032        0,
2033        0,
2034 /*10*/ 0,
2035        MatLUFactor_SeqBAIJ,
2036        0,
2037        0,
2038        MatTranspose_SeqBAIJ,
2039 /*15*/ MatGetInfo_SeqBAIJ,
2040        MatEqual_SeqBAIJ,
2041        MatGetDiagonal_SeqBAIJ,
2042        MatDiagonalScale_SeqBAIJ,
2043        MatNorm_SeqBAIJ,
2044 /*20*/ 0,
2045        MatAssemblyEnd_SeqBAIJ,
2046        0,
2047        MatSetOption_SeqBAIJ,
2048        MatZeroEntries_SeqBAIJ,
2049 /*25*/ MatZeroRows_SeqBAIJ,
2050        MatLUFactorSymbolic_SeqBAIJ,
2051        MatLUFactorNumeric_SeqBAIJ_N,
2052        MatCholeskyFactorSymbolic_SeqBAIJ,
2053        MatCholeskyFactorNumeric_SeqBAIJ_N,
2054 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2055        MatILUFactorSymbolic_SeqBAIJ,
2056        MatICCFactorSymbolic_SeqBAIJ,
2057        MatGetArray_SeqBAIJ,
2058        MatRestoreArray_SeqBAIJ,
2059 /*35*/ MatDuplicate_SeqBAIJ,
2060        0,
2061        0,
2062        MatILUFactor_SeqBAIJ,
2063        0,
2064 /*40*/ MatAXPY_SeqBAIJ,
2065        MatGetSubMatrices_SeqBAIJ,
2066        MatIncreaseOverlap_SeqBAIJ,
2067        MatGetValues_SeqBAIJ,
2068        MatCopy_SeqBAIJ,
2069 /*45*/ 0,
2070        MatScale_SeqBAIJ,
2071        0,
2072        0,
2073        0,
2074 /*50*/ 0,
2075        MatGetRowIJ_SeqBAIJ,
2076        MatRestoreRowIJ_SeqBAIJ,
2077        0,
2078        0,
2079 /*55*/ 0,
2080        0,
2081        0,
2082        0,
2083        MatSetValuesBlocked_SeqBAIJ,
2084 /*60*/ MatGetSubMatrix_SeqBAIJ,
2085        MatDestroy_SeqBAIJ,
2086        MatView_SeqBAIJ,
2087        0,
2088        0,
2089 /*65*/ 0,
2090        0,
2091        0,
2092        0,
2093        0,
2094 /*70*/ MatGetRowMax_SeqBAIJ,
2095        MatConvert_Basic,
2096        0,
2097        0,
2098        0,
2099 /*75*/ 0,
2100        0,
2101        0,
2102        0,
2103        0,
2104 /*80*/ 0,
2105        0,
2106        0,
2107        0,
2108        MatLoad_SeqBAIJ,
2109 /*85*/ 0,
2110        0,
2111        0,
2112        0,
2113        0,
2114 /*90*/ 0,
2115        0,
2116        0,
2117        0,
2118        0,
2119 /*95*/ 0,
2120        0,
2121        0,
2122        0,
2123        0,
2124 /*100*/0,
2125        0,
2126        0,
2127        0,
2128        0,
2129 /*105*/0,
2130        MatRealPart_SeqBAIJ,
2131        MatImaginaryPart_SeqBAIJ
2132 };
2133 
2134 EXTERN_C_BEGIN
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2137 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2138 {
2139   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2140   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2141   PetscErrorCode ierr;
2142 
2143   PetscFunctionBegin;
2144   if (aij->nonew != 1) {
2145     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2146   }
2147 
2148   /* allocate space for values if not already there */
2149   if (!aij->saved_values) {
2150     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2151   }
2152 
2153   /* copy values over */
2154   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2155   PetscFunctionReturn(0);
2156 }
2157 EXTERN_C_END
2158 
2159 EXTERN_C_BEGIN
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2162 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2163 {
2164   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2165   PetscErrorCode ierr;
2166   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2167 
2168   PetscFunctionBegin;
2169   if (aij->nonew != 1) {
2170     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2171   }
2172   if (!aij->saved_values) {
2173     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2174   }
2175 
2176   /* copy values over */
2177   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2178   PetscFunctionReturn(0);
2179 }
2180 EXTERN_C_END
2181 
2182 EXTERN_C_BEGIN
2183 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2185 EXTERN_C_END
2186 
2187 EXTERN_C_BEGIN
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2190 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2191 {
2192   Mat_SeqBAIJ    *b;
2193   PetscErrorCode ierr;
2194   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2195   PetscTruth     flg,skipallocation = PETSC_FALSE;
2196 
2197   PetscFunctionBegin;
2198 
2199   if (nz == MAT_SKIP_ALLOCATION) {
2200     skipallocation = PETSC_TRUE;
2201     nz             = 0;
2202   }
2203 
2204   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2205     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2206   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2207 
2208   if (nnz && newbs != bs) {
2209     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2210   }
2211   bs   = newbs;
2212 
2213   B->rmap.bs = B->cmap.bs = bs;
2214   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2215   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2216 
2217   B->preallocated = PETSC_TRUE;
2218 
2219   mbs  = B->rmap.n/bs;
2220   nbs  = B->cmap.n/bs;
2221   bs2  = bs*bs;
2222 
2223   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2224     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2225   }
2226 
2227   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2228   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2229   if (nnz) {
2230     for (i=0; i<mbs; i++) {
2231       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2232       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2233     }
2234   }
2235 
2236   b       = (Mat_SeqBAIJ*)B->data;
2237   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2238     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2239   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2240 
2241   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2242   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2243   if (!flg) {
2244     switch (bs) {
2245     case 1:
2246       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2247       B->ops->mult            = MatMult_SeqBAIJ_1;
2248       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2249       break;
2250     case 2:
2251       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2252       B->ops->mult            = MatMult_SeqBAIJ_2;
2253       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2254       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2255       break;
2256     case 3:
2257       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2258       B->ops->mult            = MatMult_SeqBAIJ_3;
2259       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2260       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2261       break;
2262     case 4:
2263       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2264       B->ops->mult            = MatMult_SeqBAIJ_4;
2265       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2266       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2267       break;
2268     case 5:
2269       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2270       B->ops->mult            = MatMult_SeqBAIJ_5;
2271       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2272       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2273       break;
2274     case 6:
2275       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2276       B->ops->mult            = MatMult_SeqBAIJ_6;
2277       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2278       break;
2279     case 7:
2280       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2281       B->ops->mult            = MatMult_SeqBAIJ_7;
2282       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2283       break;
2284     default:
2285       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2286       B->ops->mult            = MatMult_SeqBAIJ_N;
2287       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2288       break;
2289     }
2290   }
2291   B->rmap.bs      = bs;
2292   b->mbs     = mbs;
2293   b->nbs     = nbs;
2294   if (!skipallocation) {
2295     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2296     /* b->ilen will count nonzeros in each block row so far. */
2297     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2298     if (!nnz) {
2299       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2300       else if (nz <= 0)        nz = 1;
2301       for (i=0; i<mbs; i++) b->imax[i] = nz;
2302       nz = nz*mbs;
2303     } else {
2304       nz = 0;
2305       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2306     }
2307 
2308     /* allocate the matrix space */
2309     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2310     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2311     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2312     b->singlemalloc = PETSC_TRUE;
2313 
2314     b->i[0] = 0;
2315     for (i=1; i<mbs+1; i++) {
2316       b->i[i] = b->i[i-1] + b->imax[i-1];
2317     }
2318     b->free_a     = PETSC_TRUE;
2319     b->free_ij    = PETSC_TRUE;
2320   } else {
2321     b->free_a     = PETSC_FALSE;
2322     b->free_ij    = PETSC_FALSE;
2323   }
2324 
2325   B->rmap.bs          = bs;
2326   b->bs2              = bs2;
2327   b->mbs              = mbs;
2328   b->nz               = 0;
2329   b->maxnz            = nz*bs2;
2330   B->info.nz_unneeded = (PetscReal)b->maxnz;
2331   PetscFunctionReturn(0);
2332 }
2333 EXTERN_C_END
2334 
2335 /*MC
2336    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2337    block sparse compressed row format.
2338 
2339    Options Database Keys:
2340 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2341 
2342   Level: beginner
2343 
2344 .seealso: MatCreateSeqBAIJ()
2345 M*/
2346 
2347 EXTERN_C_BEGIN
2348 #undef __FUNCT__
2349 #define __FUNCT__ "MatCreate_SeqBAIJ"
2350 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2351 {
2352   PetscErrorCode ierr;
2353   PetscMPIInt    size;
2354   Mat_SeqBAIJ    *b;
2355 
2356   PetscFunctionBegin;
2357   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2358   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2359 
2360   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2361   B->data = (void*)b;
2362   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2363   B->factor           = 0;
2364   B->mapping          = 0;
2365   b->row              = 0;
2366   b->col              = 0;
2367   b->icol             = 0;
2368   b->reallocs         = 0;
2369   b->saved_values     = 0;
2370 #if defined(PETSC_USE_MAT_SINGLE)
2371   b->setvalueslen     = 0;
2372   b->setvaluescopy    = PETSC_NULL;
2373 #endif
2374 
2375   b->sorted           = PETSC_FALSE;
2376   b->roworiented      = PETSC_TRUE;
2377   b->nonew            = 0;
2378   b->diag             = 0;
2379   b->solve_work       = 0;
2380   b->mult_work        = 0;
2381   B->spptr            = 0;
2382   B->info.nz_unneeded = (PetscReal)b->maxnz;
2383   b->keepzeroedrows   = PETSC_FALSE;
2384   b->xtoy              = 0;
2385   b->XtoY              = 0;
2386   b->compressedrow.use     = PETSC_FALSE;
2387   b->compressedrow.nrows   = 0;
2388   b->compressedrow.i       = PETSC_NULL;
2389   b->compressedrow.rindex  = PETSC_NULL;
2390   b->compressedrow.checked = PETSC_FALSE;
2391   B->same_nonzero          = PETSC_FALSE;
2392 
2393   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2394                                      "MatInvertBlockDiagonal_SeqBAIJ",
2395                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2397                                      "MatStoreValues_SeqBAIJ",
2398                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2400                                      "MatRetrieveValues_SeqBAIJ",
2401                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2403                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2404                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2406                                      "MatConvert_SeqBAIJ_SeqAIJ",
2407                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2409                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2410                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2412                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2413                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 EXTERN_C_END
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2420 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2421 {
2422   Mat            C;
2423   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2424   PetscErrorCode ierr;
2425   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2426 
2427   PetscFunctionBegin;
2428   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2429 
2430   *B = 0;
2431   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2432   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2433   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2434   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2435   c    = (Mat_SeqBAIJ*)C->data;
2436 
2437   C->rmap.N   = A->rmap.N;
2438   C->cmap.N   = A->cmap.N;
2439   C->rmap.bs  = A->rmap.bs;
2440   c->bs2 = a->bs2;
2441   c->mbs = a->mbs;
2442   c->nbs = a->nbs;
2443 
2444   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2445   for (i=0; i<mbs; i++) {
2446     c->imax[i] = a->imax[i];
2447     c->ilen[i] = a->ilen[i];
2448   }
2449 
2450   /* allocate the matrix space */
2451   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2452   c->singlemalloc = PETSC_TRUE;
2453   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2454   if (mbs > 0) {
2455     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2456     if (cpvalues == MAT_COPY_VALUES) {
2457       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2458     } else {
2459       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2460     }
2461   }
2462   c->sorted      = a->sorted;
2463   c->roworiented = a->roworiented;
2464   c->nonew       = a->nonew;
2465 
2466   if (a->diag) {
2467     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2468     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2469     for (i=0; i<mbs; i++) {
2470       c->diag[i] = a->diag[i];
2471     }
2472   } else c->diag        = 0;
2473   c->nz                 = a->nz;
2474   c->maxnz              = a->maxnz;
2475   c->solve_work         = 0;
2476   c->mult_work          = 0;
2477   c->free_a             = PETSC_TRUE;
2478   c->free_ij            = PETSC_TRUE;
2479   C->preallocated       = PETSC_TRUE;
2480   C->assembled          = PETSC_TRUE;
2481 
2482   c->compressedrow.use     = a->compressedrow.use;
2483   c->compressedrow.nrows   = a->compressedrow.nrows;
2484   c->compressedrow.checked = a->compressedrow.checked;
2485   if ( a->compressedrow.checked && a->compressedrow.use){
2486     i = a->compressedrow.nrows;
2487     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2488     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2489     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2490     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2491   } else {
2492     c->compressedrow.use    = PETSC_FALSE;
2493     c->compressedrow.i      = PETSC_NULL;
2494     c->compressedrow.rindex = PETSC_NULL;
2495   }
2496   C->same_nonzero = A->same_nonzero;
2497   *B = C;
2498   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatLoad_SeqBAIJ"
2504 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2505 {
2506   Mat_SeqBAIJ    *a;
2507   Mat            B;
2508   PetscErrorCode ierr;
2509   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2510   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2511   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2512   PetscInt       *masked,nmask,tmp,bs2,ishift;
2513   PetscMPIInt    size;
2514   int            fd;
2515   PetscScalar    *aa;
2516   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2517 
2518   PetscFunctionBegin;
2519   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2520     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2521   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2522   bs2  = bs*bs;
2523 
2524   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2525   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2526   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2527   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2528   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2529   M = header[1]; N = header[2]; nz = header[3];
2530 
2531   if (header[3] < 0) {
2532     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2533   }
2534 
2535   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2536 
2537   /*
2538      This code adds extra rows to make sure the number of rows is
2539     divisible by the blocksize
2540   */
2541   mbs        = M/bs;
2542   extra_rows = bs - M + bs*(mbs);
2543   if (extra_rows == bs) extra_rows = 0;
2544   else                  mbs++;
2545   if (extra_rows) {
2546     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2547   }
2548 
2549   /* read in row lengths */
2550   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2551   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2552   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2553 
2554   /* read in column indices */
2555   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2556   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2557   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2558 
2559   /* loop over row lengths determining block row lengths */
2560   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2561   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2562   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2563   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2564   masked   = mask + mbs;
2565   rowcount = 0; nzcount = 0;
2566   for (i=0; i<mbs; i++) {
2567     nmask = 0;
2568     for (j=0; j<bs; j++) {
2569       kmax = rowlengths[rowcount];
2570       for (k=0; k<kmax; k++) {
2571         tmp = jj[nzcount++]/bs;
2572         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2573       }
2574       rowcount++;
2575     }
2576     browlengths[i] += nmask;
2577     /* zero out the mask elements we set */
2578     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2579   }
2580 
2581   /* create our matrix */
2582   ierr = MatCreate(comm,&B);
2583   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2584   ierr = MatSetType(B,type);CHKERRQ(ierr);
2585   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2586   a = (Mat_SeqBAIJ*)B->data;
2587 
2588   /* set matrix "i" values */
2589   a->i[0] = 0;
2590   for (i=1; i<= mbs; i++) {
2591     a->i[i]      = a->i[i-1] + browlengths[i-1];
2592     a->ilen[i-1] = browlengths[i-1];
2593   }
2594   a->nz         = 0;
2595   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2596 
2597   /* read in nonzero values */
2598   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2599   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2600   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2601 
2602   /* set "a" and "j" values into matrix */
2603   nzcount = 0; jcount = 0;
2604   for (i=0; i<mbs; i++) {
2605     nzcountb = nzcount;
2606     nmask    = 0;
2607     for (j=0; j<bs; j++) {
2608       kmax = rowlengths[i*bs+j];
2609       for (k=0; k<kmax; k++) {
2610         tmp = jj[nzcount++]/bs;
2611 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2612       }
2613     }
2614     /* sort the masked values */
2615     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2616 
2617     /* set "j" values into matrix */
2618     maskcount = 1;
2619     for (j=0; j<nmask; j++) {
2620       a->j[jcount++]  = masked[j];
2621       mask[masked[j]] = maskcount++;
2622     }
2623     /* set "a" values into matrix */
2624     ishift = bs2*a->i[i];
2625     for (j=0; j<bs; j++) {
2626       kmax = rowlengths[i*bs+j];
2627       for (k=0; k<kmax; k++) {
2628         tmp       = jj[nzcountb]/bs ;
2629         block     = mask[tmp] - 1;
2630         point     = jj[nzcountb] - bs*tmp;
2631         idx       = ishift + bs2*block + j + bs*point;
2632         a->a[idx] = (MatScalar)aa[nzcountb++];
2633       }
2634     }
2635     /* zero out the mask elements we set */
2636     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2637   }
2638   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2639 
2640   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2641   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2642   ierr = PetscFree(aa);CHKERRQ(ierr);
2643   ierr = PetscFree(jj);CHKERRQ(ierr);
2644   ierr = PetscFree(mask);CHKERRQ(ierr);
2645 
2646   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2647   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2648   ierr = MatView_Private(B);CHKERRQ(ierr);
2649 
2650   *A = B;
2651   PetscFunctionReturn(0);
2652 }
2653 
2654 #undef __FUNCT__
2655 #define __FUNCT__ "MatCreateSeqBAIJ"
2656 /*@C
2657    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2658    compressed row) format.  For good matrix assembly performance the
2659    user should preallocate the matrix storage by setting the parameter nz
2660    (or the array nnz).  By setting these parameters accurately, performance
2661    during matrix assembly can be increased by more than a factor of 50.
2662 
2663    Collective on MPI_Comm
2664 
2665    Input Parameters:
2666 +  comm - MPI communicator, set to PETSC_COMM_SELF
2667 .  bs - size of block
2668 .  m - number of rows
2669 .  n - number of columns
2670 .  nz - number of nonzero blocks  per block row (same for all rows)
2671 -  nnz - array containing the number of nonzero blocks in the various block rows
2672          (possibly different for each block row) or PETSC_NULL
2673 
2674    Output Parameter:
2675 .  A - the matrix
2676 
2677    Options Database Keys:
2678 .   -mat_no_unroll - uses code that does not unroll the loops in the
2679                      block calculations (much slower)
2680 .    -mat_block_size - size of the blocks to use
2681 
2682    Level: intermediate
2683 
2684    Notes:
2685    The number of rows and columns must be divisible by blocksize.
2686 
2687    If the nnz parameter is given then the nz parameter is ignored
2688 
2689    A nonzero block is any block that as 1 or more nonzeros in it
2690 
2691    The block AIJ format is fully compatible with standard Fortran 77
2692    storage.  That is, the stored row and column indices can begin at
2693    either one (as in Fortran) or zero.  See the users' manual for details.
2694 
2695    Specify the preallocated storage with either nz or nnz (not both).
2696    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2697    allocation.  For additional details, see the users manual chapter on
2698    matrices.
2699 
2700 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2701 @*/
2702 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2703 {
2704   PetscErrorCode ierr;
2705 
2706   PetscFunctionBegin;
2707   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2708   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2709   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2710   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2716 /*@C
2717    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2718    per row in the matrix. For good matrix assembly performance the
2719    user should preallocate the matrix storage by setting the parameter nz
2720    (or the array nnz).  By setting these parameters accurately, performance
2721    during matrix assembly can be increased by more than a factor of 50.
2722 
2723    Collective on MPI_Comm
2724 
2725    Input Parameters:
2726 +  A - the matrix
2727 .  bs - size of block
2728 .  nz - number of block nonzeros per block row (same for all rows)
2729 -  nnz - array containing the number of block nonzeros in the various block rows
2730          (possibly different for each block row) or PETSC_NULL
2731 
2732    Options Database Keys:
2733 .   -mat_no_unroll - uses code that does not unroll the loops in the
2734                      block calculations (much slower)
2735 .    -mat_block_size - size of the blocks to use
2736 
2737    Level: intermediate
2738 
2739    Notes:
2740    If the nnz parameter is given then the nz parameter is ignored
2741 
2742    The block AIJ format is fully compatible with standard Fortran 77
2743    storage.  That is, the stored row and column indices can begin at
2744    either one (as in Fortran) or zero.  See the users' manual for details.
2745 
2746    Specify the preallocated storage with either nz or nnz (not both).
2747    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2748    allocation.  For additional details, see the users manual chapter on
2749    matrices.
2750 
2751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2752 @*/
2753 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2754 {
2755   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2756 
2757   PetscFunctionBegin;
2758   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2759   if (f) {
2760     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2761   }
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 #undef __FUNCT__
2766 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2767 /*@
2768      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2769               (upper triangular entries in CSR format) provided by the user.
2770 
2771      Collective on MPI_Comm
2772 
2773    Input Parameters:
2774 +  comm - must be an MPI communicator of size 1
2775 .  bs - size of block
2776 .  m - number of rows
2777 .  n - number of columns
2778 .  i - row indices
2779 .  j - column indices
2780 -  a - matrix values
2781 
2782    Output Parameter:
2783 .  mat - the matrix
2784 
2785    Level: intermediate
2786 
2787    Notes:
2788        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2789     once the matrix is destroyed
2790 
2791        You cannot set new nonzero locations into this matrix, that will generate an error.
2792 
2793        The i and j indices are 0 based
2794 
2795 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2796 
2797 @*/
2798 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2799 {
2800   PetscErrorCode ierr;
2801   PetscInt       ii;
2802   Mat_SeqBAIJ    *baij;
2803 
2804   PetscFunctionBegin;
2805   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2806   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2807 
2808   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2809   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2810   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2811   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2812   baij = (Mat_SeqBAIJ*)(*mat)->data;
2813   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2814 
2815   baij->i = i;
2816   baij->j = j;
2817   baij->a = a;
2818   baij->singlemalloc = PETSC_FALSE;
2819   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2820   baij->free_a       = PETSC_FALSE;
2821   baij->free_ij       = PETSC_FALSE;
2822 
2823   for (ii=0; ii<m; ii++) {
2824     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2825 #if defined(PETSC_USE_DEBUG)
2826     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2827 #endif
2828   }
2829 #if defined(PETSC_USE_DEBUG)
2830   for (ii=0; ii<baij->i[m]; ii++) {
2831     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2832     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2833   }
2834 #endif
2835 
2836   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840