xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 22612f2f7cceb60caedd65384cdf99fc989f2aeb)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt     i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs;
785   PetscInt     *tia, *tja;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
792   } else {
793     tia = a->i; tja = a->j;
794   }
795 
796   if (!blockcompressed) {
797     /* malloc & create the natural set of indices */
798     ierr = PetscMalloc2((n+1)*bs,PetscInt,ia,nz*bs,PetscInt,ja);CHKERRQ(ierr);
799     for (i=0; i<n+1; i++) {
800       for (j=0; j<bs; j++) {
801         *ia[i*bs+j] = tia[i]*bs+j;
802       }
803     }
804     for (i=0; i<nz; i++) {
805       for (j=0; j<bs; j++) {
806         *ja[i*bs+j] = tia[i]*bs+j;
807       }
808     }
809     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
810       ierr = PetscFree(tia);CHKERRQ(ierr);
811       ierr = PetscFree(tja);CHKERRQ(ierr);
812     }
813   } else {
814     *ia = tia;
815     *ja = tja;
816   }
817   if (oshift == 1) {
818     for (i=0; i<nz; i++) (*ja)[i]++;
819     for (i=0; i<n+1; i++) (*ia)[i]++;
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
826 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
827 {
828   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
829   PetscErrorCode ierr;
830   PetscInt       i,n = a->mbs,nz = a->i[n];
831 
832   PetscFunctionBegin;
833   if (!ia) PetscFunctionReturn(0);
834   if (!blockcompressed) {
835     ierr = PetscFree2(*ia,*ja);CHKERRQ(ierr);
836   }else if (symmetric) {
837     ierr = PetscFree(*ia);CHKERRQ(ierr);
838     ierr = PetscFree(*ja);CHKERRQ(ierr);
839   } else if (oshift == 1) { /* blockcompressed */
840     for (i=0; i<nz; i++) a->j[i]--;
841     for (i=0; i<n+1; i++) a->i[i]--;
842   }
843   PetscFunctionReturn(0);
844 }
845 
846 #undef __FUNCT__
847 #define __FUNCT__ "MatDestroy_SeqBAIJ"
848 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
849 {
850   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
851   PetscErrorCode ierr;
852 
853   PetscFunctionBegin;
854 #if defined(PETSC_USE_LOG)
855   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
856 #endif
857   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
858   if (a->row) {
859     ierr = ISDestroy(a->row);CHKERRQ(ierr);
860   }
861   if (a->col) {
862     ierr = ISDestroy(a->col);CHKERRQ(ierr);
863   }
864   ierr = PetscFree(a->diag);CHKERRQ(ierr);
865   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
866   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
867   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
868   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
869   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
870   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
871 #if defined(PETSC_USE_MAT_SINGLE)
872   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
873 #endif
874   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
875   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
876 
877   ierr = PetscFree(a);CHKERRQ(ierr);
878 
879   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
880   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
881   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
882   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
883   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
884   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
885   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
886   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatSetOption_SeqBAIJ"
892 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
893 {
894   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
895   PetscErrorCode ierr;
896 
897   PetscFunctionBegin;
898   switch (op) {
899   case MAT_ROW_ORIENTED:
900     a->roworiented    = flg;
901     break;
902   case MAT_KEEP_ZEROED_ROWS:
903     a->keepzeroedrows = flg;
904     break;
905   case MAT_NEW_NONZERO_LOCATIONS:
906     a->nonew          = (flg ? 0 : 1);
907     break;
908   case MAT_NEW_NONZERO_LOCATION_ERR:
909     a->nonew          = (flg ? -1 : 0);
910     break;
911   case MAT_NEW_NONZERO_ALLOCATION_ERR:
912     a->nonew          = (flg ? -2 : 0);
913     break;
914   case MAT_NEW_DIAGONALS:
915   case MAT_IGNORE_OFF_PROC_ENTRIES:
916   case MAT_USE_HASH_TABLE:
917     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
918     break;
919   case MAT_SYMMETRIC:
920   case MAT_STRUCTURALLY_SYMMETRIC:
921   case MAT_HERMITIAN:
922   case MAT_SYMMETRY_ETERNAL:
923     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
924     break;
925   default:
926     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
927   }
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatGetRow_SeqBAIJ"
933 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
934 {
935   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
938   MatScalar      *aa,*aa_i;
939   PetscScalar    *v_i;
940 
941   PetscFunctionBegin;
942   bs  = A->rmap.bs;
943   ai  = a->i;
944   aj  = a->j;
945   aa  = a->a;
946   bs2 = a->bs2;
947 
948   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
949 
950   bn  = row/bs;   /* Block number */
951   bp  = row % bs; /* Block Position */
952   M   = ai[bn+1] - ai[bn];
953   *nz = bs*M;
954 
955   if (v) {
956     *v = 0;
957     if (*nz) {
958       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
959       for (i=0; i<M; i++) { /* for each block in the block row */
960         v_i  = *v + i*bs;
961         aa_i = aa + bs2*(ai[bn] + i);
962         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
963       }
964     }
965   }
966 
967   if (idx) {
968     *idx = 0;
969     if (*nz) {
970       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
971       for (i=0; i<M; i++) { /* for each block in the block row */
972         idx_i = *idx + i*bs;
973         itmp  = bs*aj[ai[bn] + i];
974         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
975       }
976     }
977   }
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
983 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
984 {
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
989   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
990   PetscFunctionReturn(0);
991 }
992 
993 #undef __FUNCT__
994 #define __FUNCT__ "MatTranspose_SeqBAIJ"
995 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
996 {
997   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
998   Mat            C;
999   PetscErrorCode ierr;
1000   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1001   PetscInt       *rows,*cols,bs2=a->bs2;
1002   PetscScalar    *array;
1003 
1004   PetscFunctionBegin;
1005   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1006   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1007   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1008 
1009 #if defined(PETSC_USE_MAT_SINGLE)
1010   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1011   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1012 #else
1013   array = a->a;
1014 #endif
1015 
1016   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1017   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1018   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1019   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1020   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1021   ierr = PetscFree(col);CHKERRQ(ierr);
1022   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1023   cols = rows + bs;
1024   for (i=0; i<mbs; i++) {
1025     cols[0] = i*bs;
1026     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1027     len = ai[i+1] - ai[i];
1028     for (j=0; j<len; j++) {
1029       rows[0] = (*aj++)*bs;
1030       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1031       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1032       array += bs2;
1033     }
1034   }
1035   ierr = PetscFree(rows);CHKERRQ(ierr);
1036 #if defined(PETSC_USE_MAT_SINGLE)
1037   ierr = PetscFree(array);
1038 #endif
1039 
1040   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1042 
1043   if (B) {
1044     *B = C;
1045   } else {
1046     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1047   }
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1053 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1054 {
1055   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1056   PetscErrorCode ierr;
1057   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1058   int            fd;
1059   PetscScalar    *aa;
1060   FILE           *file;
1061 
1062   PetscFunctionBegin;
1063   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1064   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1065   col_lens[0] = MAT_FILE_COOKIE;
1066 
1067   col_lens[1] = A->rmap.N;
1068   col_lens[2] = A->cmap.n;
1069   col_lens[3] = a->nz*bs2;
1070 
1071   /* store lengths of each row and write (including header) to file */
1072   count = 0;
1073   for (i=0; i<a->mbs; i++) {
1074     for (j=0; j<bs; j++) {
1075       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1076     }
1077   }
1078   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1079   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1080 
1081   /* store column indices (zero start index) */
1082   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1083   count = 0;
1084   for (i=0; i<a->mbs; i++) {
1085     for (j=0; j<bs; j++) {
1086       for (k=a->i[i]; k<a->i[i+1]; k++) {
1087         for (l=0; l<bs; l++) {
1088           jj[count++] = bs*a->j[k] + l;
1089         }
1090       }
1091     }
1092   }
1093   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1094   ierr = PetscFree(jj);CHKERRQ(ierr);
1095 
1096   /* store nonzero values */
1097   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1098   count = 0;
1099   for (i=0; i<a->mbs; i++) {
1100     for (j=0; j<bs; j++) {
1101       for (k=a->i[i]; k<a->i[i+1]; k++) {
1102         for (l=0; l<bs; l++) {
1103           aa[count++] = a->a[bs2*k + l*bs + j];
1104         }
1105       }
1106     }
1107   }
1108   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1109   ierr = PetscFree(aa);CHKERRQ(ierr);
1110 
1111   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1112   if (file) {
1113     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1114   }
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1120 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1121 {
1122   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1123   PetscErrorCode    ierr;
1124   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1125   PetscViewerFormat format;
1126 
1127   PetscFunctionBegin;
1128   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1129   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1130     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1131   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1132     Mat aij;
1133     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1134     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1135     ierr = MatDestroy(aij);CHKERRQ(ierr);
1136   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1137      PetscFunctionReturn(0);
1138   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1139     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1140     for (i=0; i<a->mbs; i++) {
1141       for (j=0; j<bs; j++) {
1142         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1143         for (k=a->i[i]; k<a->i[i+1]; k++) {
1144           for (l=0; l<bs; l++) {
1145 #if defined(PETSC_USE_COMPLEX)
1146             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1148                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1151                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1152             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1153               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1154             }
1155 #else
1156             if (a->a[bs2*k + l*bs + j] != 0.0) {
1157               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1158             }
1159 #endif
1160           }
1161         }
1162         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1163       }
1164     }
1165     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1166   } else {
1167     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1168     for (i=0; i<a->mbs; i++) {
1169       for (j=0; j<bs; j++) {
1170         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1171         for (k=a->i[i]; k<a->i[i+1]; k++) {
1172           for (l=0; l<bs; l++) {
1173 #if defined(PETSC_USE_COMPLEX)
1174             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1175               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1176                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1179                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1180             } else {
1181               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1182             }
1183 #else
1184             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1185 #endif
1186           }
1187         }
1188         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1189       }
1190     }
1191     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1192   }
1193   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1194   PetscFunctionReturn(0);
1195 }
1196 
1197 #undef __FUNCT__
1198 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1199 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1200 {
1201   Mat            A = (Mat) Aa;
1202   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1203   PetscErrorCode ierr;
1204   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1205   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1206   MatScalar      *aa;
1207   PetscViewer    viewer;
1208 
1209   PetscFunctionBegin;
1210 
1211   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1212   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1213 
1214   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1215 
1216   /* loop over matrix elements drawing boxes */
1217   color = PETSC_DRAW_BLUE;
1218   for (i=0,row=0; i<mbs; i++,row+=bs) {
1219     for (j=a->i[i]; j<a->i[i+1]; j++) {
1220       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1221       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1222       aa = a->a + j*bs2;
1223       for (k=0; k<bs; k++) {
1224         for (l=0; l<bs; l++) {
1225           if (PetscRealPart(*aa++) >=  0.) continue;
1226           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1227         }
1228       }
1229     }
1230   }
1231   color = PETSC_DRAW_CYAN;
1232   for (i=0,row=0; i<mbs; i++,row+=bs) {
1233     for (j=a->i[i]; j<a->i[i+1]; j++) {
1234       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1235       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1236       aa = a->a + j*bs2;
1237       for (k=0; k<bs; k++) {
1238         for (l=0; l<bs; l++) {
1239           if (PetscRealPart(*aa++) != 0.) continue;
1240           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1241         }
1242       }
1243     }
1244   }
1245 
1246   color = PETSC_DRAW_RED;
1247   for (i=0,row=0; i<mbs; i++,row+=bs) {
1248     for (j=a->i[i]; j<a->i[i+1]; j++) {
1249       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1250       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1251       aa = a->a + j*bs2;
1252       for (k=0; k<bs; k++) {
1253         for (l=0; l<bs; l++) {
1254           if (PetscRealPart(*aa++) <= 0.) continue;
1255           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1256         }
1257       }
1258     }
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1265 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1266 {
1267   PetscErrorCode ierr;
1268   PetscReal      xl,yl,xr,yr,w,h;
1269   PetscDraw      draw;
1270   PetscTruth     isnull;
1271 
1272   PetscFunctionBegin;
1273 
1274   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1275   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1276 
1277   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1278   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1279   xr += w;    yr += h;  xl = -w;     yl = -h;
1280   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1281   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1282   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatView_SeqBAIJ"
1288 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1289 {
1290   PetscErrorCode ierr;
1291   PetscTruth     iascii,isbinary,isdraw;
1292 
1293   PetscFunctionBegin;
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1295   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1296   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1297   if (iascii){
1298     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1299   } else if (isbinary) {
1300     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1301   } else if (isdraw) {
1302     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1303   } else {
1304     Mat B;
1305     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1306     ierr = MatView(B,viewer);CHKERRQ(ierr);
1307     ierr = MatDestroy(B);CHKERRQ(ierr);
1308   }
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1315 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1316 {
1317   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1318   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1319   PetscInt    *ai = a->i,*ailen = a->ilen;
1320   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1321   MatScalar   *ap,*aa = a->a;
1322 
1323   PetscFunctionBegin;
1324   for (k=0; k<m; k++) { /* loop over rows */
1325     row  = im[k]; brow = row/bs;
1326     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1327     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1328     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1329     nrow = ailen[brow];
1330     for (l=0; l<n; l++) { /* loop over columns */
1331       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1332       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1333       col  = in[l] ;
1334       bcol = col/bs;
1335       cidx = col%bs;
1336       ridx = row%bs;
1337       high = nrow;
1338       low  = 0; /* assume unsorted */
1339       while (high-low > 5) {
1340         t = (low+high)/2;
1341         if (rp[t] > bcol) high = t;
1342         else             low  = t;
1343       }
1344       for (i=low; i<high; i++) {
1345         if (rp[i] > bcol) break;
1346         if (rp[i] == bcol) {
1347           *v++ = ap[bs2*i+bs*cidx+ridx];
1348           goto finished;
1349         }
1350       }
1351       *v++ = 0.0;
1352       finished:;
1353     }
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #if defined(PETSC_USE_MAT_SINGLE)
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1361 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1362 {
1363   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1364   PetscErrorCode ierr;
1365   PetscInt       i,N = m*n*b->bs2;
1366   MatScalar      *vsingle;
1367 
1368   PetscFunctionBegin;
1369   if (N > b->setvalueslen) {
1370     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1371     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1372     b->setvalueslen  = N;
1373   }
1374   vsingle = b->setvaluescopy;
1375   for (i=0; i<N; i++) {
1376     vsingle[i] = v[i];
1377   }
1378   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 #endif
1382 
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1386 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1387 {
1388   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1389   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1390   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1391   PetscErrorCode  ierr;
1392   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1393   PetscTruth      roworiented=a->roworiented;
1394   const MatScalar *value = v;
1395   MatScalar       *ap,*aa = a->a,*bap;
1396 
1397   PetscFunctionBegin;
1398   if (roworiented) {
1399     stepval = (n-1)*bs;
1400   } else {
1401     stepval = (m-1)*bs;
1402   }
1403   for (k=0; k<m; k++) { /* loop over added rows */
1404     row  = im[k];
1405     if (row < 0) continue;
1406 #if defined(PETSC_USE_DEBUG)
1407     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1408 #endif
1409     rp   = aj + ai[row];
1410     ap   = aa + bs2*ai[row];
1411     rmax = imax[row];
1412     nrow = ailen[row];
1413     low  = 0;
1414     high = nrow;
1415     for (l=0; l<n; l++) { /* loop over added columns */
1416       if (in[l] < 0) continue;
1417 #if defined(PETSC_USE_DEBUG)
1418       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1419 #endif
1420       col = in[l];
1421       if (roworiented) {
1422         value = v + k*(stepval+bs)*bs + l*bs;
1423       } else {
1424         value = v + l*(stepval+bs)*bs + k*bs;
1425       }
1426       if (col <= lastcol) low = 0; else high = nrow;
1427       lastcol = col;
1428       while (high-low > 7) {
1429         t = (low+high)/2;
1430         if (rp[t] > col) high = t;
1431         else             low  = t;
1432       }
1433       for (i=low; i<high; i++) {
1434         if (rp[i] > col) break;
1435         if (rp[i] == col) {
1436           bap  = ap +  bs2*i;
1437           if (roworiented) {
1438             if (is == ADD_VALUES) {
1439               for (ii=0; ii<bs; ii++,value+=stepval) {
1440                 for (jj=ii; jj<bs2; jj+=bs) {
1441                   bap[jj] += *value++;
1442                 }
1443               }
1444             } else {
1445               for (ii=0; ii<bs; ii++,value+=stepval) {
1446                 for (jj=ii; jj<bs2; jj+=bs) {
1447                   bap[jj] = *value++;
1448                 }
1449               }
1450             }
1451           } else {
1452             if (is == ADD_VALUES) {
1453               for (ii=0; ii<bs; ii++,value+=stepval) {
1454                 for (jj=0; jj<bs; jj++) {
1455                   *bap++ += *value++;
1456                 }
1457               }
1458             } else {
1459               for (ii=0; ii<bs; ii++,value+=stepval) {
1460                 for (jj=0; jj<bs; jj++) {
1461                   *bap++  = *value++;
1462                 }
1463               }
1464             }
1465           }
1466           goto noinsert2;
1467         }
1468       }
1469       if (nonew == 1) goto noinsert2;
1470       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1471       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1472       N = nrow++ - 1; high++;
1473       /* shift up all the later entries in this row */
1474       for (ii=N; ii>=i; ii--) {
1475         rp[ii+1] = rp[ii];
1476         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1477       }
1478       if (N >= i) {
1479         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1480       }
1481       rp[i] = col;
1482       bap   = ap +  bs2*i;
1483       if (roworiented) {
1484         for (ii=0; ii<bs; ii++,value+=stepval) {
1485           for (jj=ii; jj<bs2; jj+=bs) {
1486             bap[jj] = *value++;
1487           }
1488         }
1489       } else {
1490         for (ii=0; ii<bs; ii++,value+=stepval) {
1491           for (jj=0; jj<bs; jj++) {
1492             *bap++  = *value++;
1493           }
1494         }
1495       }
1496       noinsert2:;
1497       low = i;
1498     }
1499     ailen[row] = nrow;
1500   }
1501   PetscFunctionReturn(0);
1502 }
1503 
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1506 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1507 {
1508   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1509   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1510   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1511   PetscErrorCode ierr;
1512   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1513   MatScalar      *aa = a->a,*ap;
1514   PetscReal      ratio=0.6;
1515 
1516   PetscFunctionBegin;
1517   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1518 
1519   if (m) rmax = ailen[0];
1520   for (i=1; i<mbs; i++) {
1521     /* move each row back by the amount of empty slots (fshift) before it*/
1522     fshift += imax[i-1] - ailen[i-1];
1523     rmax   = PetscMax(rmax,ailen[i]);
1524     if (fshift) {
1525       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1526       N = ailen[i];
1527       for (j=0; j<N; j++) {
1528         ip[j-fshift] = ip[j];
1529         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1530       }
1531     }
1532     ai[i] = ai[i-1] + ailen[i-1];
1533   }
1534   if (mbs) {
1535     fshift += imax[mbs-1] - ailen[mbs-1];
1536     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1537   }
1538   /* reset ilen and imax for each row */
1539   for (i=0; i<mbs; i++) {
1540     ailen[i] = imax[i] = ai[i+1] - ai[i];
1541   }
1542   a->nz = ai[mbs];
1543 
1544   /* diagonals may have moved, so kill the diagonal pointers */
1545   a->idiagvalid = PETSC_FALSE;
1546   if (fshift && a->diag) {
1547     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1548     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1549     a->diag = 0;
1550   }
1551   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1552   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1553   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1554   a->reallocs          = 0;
1555   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1556 
1557   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1558   if (a->compressedrow.use){
1559     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1560   }
1561 
1562   A->same_nonzero = PETSC_TRUE;
1563   PetscFunctionReturn(0);
1564 }
1565 
1566 /*
1567    This function returns an array of flags which indicate the locations of contiguous
1568    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1569    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1570    Assume: sizes should be long enough to hold all the values.
1571 */
1572 #undef __FUNCT__
1573 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1574 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1575 {
1576   PetscInt   i,j,k,row;
1577   PetscTruth flg;
1578 
1579   PetscFunctionBegin;
1580   for (i=0,j=0; i<n; j++) {
1581     row = idx[i];
1582     if (row%bs!=0) { /* Not the begining of a block */
1583       sizes[j] = 1;
1584       i++;
1585     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1586       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1587       i++;
1588     } else { /* Begining of the block, so check if the complete block exists */
1589       flg = PETSC_TRUE;
1590       for (k=1; k<bs; k++) {
1591         if (row+k != idx[i+k]) { /* break in the block */
1592           flg = PETSC_FALSE;
1593           break;
1594         }
1595       }
1596       if (flg) { /* No break in the bs */
1597         sizes[j] = bs;
1598         i+= bs;
1599       } else {
1600         sizes[j] = 1;
1601         i++;
1602       }
1603     }
1604   }
1605   *bs_max = j;
1606   PetscFunctionReturn(0);
1607 }
1608 
1609 #undef __FUNCT__
1610 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1611 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1612 {
1613   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1614   PetscErrorCode ierr;
1615   PetscInt       i,j,k,count,*rows;
1616   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1617   PetscScalar    zero = 0.0;
1618   MatScalar      *aa;
1619 
1620   PetscFunctionBegin;
1621   /* Make a copy of the IS and  sort it */
1622   /* allocate memory for rows,sizes */
1623   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1624   sizes = rows + is_n;
1625 
1626   /* copy IS values to rows, and sort them */
1627   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1628   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1629   if (baij->keepzeroedrows) {
1630     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1631     bs_max = is_n;
1632     A->same_nonzero = PETSC_TRUE;
1633   } else {
1634     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1635     A->same_nonzero = PETSC_FALSE;
1636   }
1637 
1638   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1639     row   = rows[j];
1640     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1641     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1642     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1643     if (sizes[i] == bs && !baij->keepzeroedrows) {
1644       if (diag != 0.0) {
1645         if (baij->ilen[row/bs] > 0) {
1646           baij->ilen[row/bs]       = 1;
1647           baij->j[baij->i[row/bs]] = row/bs;
1648           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1649         }
1650         /* Now insert all the diagonal values for this bs */
1651         for (k=0; k<bs; k++) {
1652           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1653         }
1654       } else { /* (diag == 0.0) */
1655         baij->ilen[row/bs] = 0;
1656       } /* end (diag == 0.0) */
1657     } else { /* (sizes[i] != bs) */
1658 #if defined (PETSC_USE_DEBUG)
1659       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1660 #endif
1661       for (k=0; k<count; k++) {
1662         aa[0] =  zero;
1663         aa    += bs;
1664       }
1665       if (diag != 0.0) {
1666         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1667       }
1668     }
1669   }
1670 
1671   ierr = PetscFree(rows);CHKERRQ(ierr);
1672   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 #undef __FUNCT__
1677 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1678 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1679 {
1680   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1681   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1682   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1683   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1684   PetscErrorCode ierr;
1685   PetscInt       ridx,cidx,bs2=a->bs2;
1686   PetscTruth     roworiented=a->roworiented;
1687   MatScalar      *ap,value,*aa=a->a,*bap;
1688 
1689   PetscFunctionBegin;
1690   for (k=0; k<m; k++) { /* loop over added rows */
1691     row  = im[k];
1692     brow = row/bs;
1693     if (row < 0) continue;
1694 #if defined(PETSC_USE_DEBUG)
1695     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1696 #endif
1697     rp   = aj + ai[brow];
1698     ap   = aa + bs2*ai[brow];
1699     rmax = imax[brow];
1700     nrow = ailen[brow];
1701     low  = 0;
1702     high = nrow;
1703     for (l=0; l<n; l++) { /* loop over added columns */
1704       if (in[l] < 0) continue;
1705 #if defined(PETSC_USE_DEBUG)
1706       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1707 #endif
1708       col = in[l]; bcol = col/bs;
1709       ridx = row % bs; cidx = col % bs;
1710       if (roworiented) {
1711         value = v[l + k*n];
1712       } else {
1713         value = v[k + l*m];
1714       }
1715       if (col <= lastcol) low = 0; else high = nrow;
1716       lastcol = col;
1717       while (high-low > 7) {
1718         t = (low+high)/2;
1719         if (rp[t] > bcol) high = t;
1720         else              low  = t;
1721       }
1722       for (i=low; i<high; i++) {
1723         if (rp[i] > bcol) break;
1724         if (rp[i] == bcol) {
1725           bap  = ap +  bs2*i + bs*cidx + ridx;
1726           if (is == ADD_VALUES) *bap += value;
1727           else                  *bap  = value;
1728           goto noinsert1;
1729         }
1730       }
1731       if (nonew == 1) goto noinsert1;
1732       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1733       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1734       N = nrow++ - 1; high++;
1735       /* shift up all the later entries in this row */
1736       for (ii=N; ii>=i; ii--) {
1737         rp[ii+1] = rp[ii];
1738         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1739       }
1740       if (N>=i) {
1741         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1742       }
1743       rp[i]                      = bcol;
1744       ap[bs2*i + bs*cidx + ridx] = value;
1745       a->nz++;
1746       noinsert1:;
1747       low = i;
1748     }
1749     ailen[brow] = nrow;
1750   }
1751   A->same_nonzero = PETSC_FALSE;
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 
1756 #undef __FUNCT__
1757 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1758 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1759 {
1760   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1761   Mat            outA;
1762   PetscErrorCode ierr;
1763   PetscTruth     row_identity,col_identity;
1764 
1765   PetscFunctionBegin;
1766   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1767   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1768   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1769   if (!row_identity || !col_identity) {
1770     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1771   }
1772 
1773   outA          = inA;
1774   inA->factor   = FACTOR_LU;
1775 
1776   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1777 
1778   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1779   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1780   a->row = row;
1781   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1782   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1783   a->col = col;
1784 
1785   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1786   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1787   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1788 
1789   /*
1790       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1791       for ILU(0) factorization with natural ordering
1792   */
1793   if (inA->rmap.bs < 8) {
1794     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1795   } else {
1796     if (!a->solve_work) {
1797       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1798       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1799     }
1800   }
1801 
1802   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1803 
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 EXTERN_C_BEGIN
1808 #undef __FUNCT__
1809 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1810 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1811 {
1812   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1813   PetscInt    i,nz,nbs;
1814 
1815   PetscFunctionBegin;
1816   nz  = baij->maxnz/baij->bs2;
1817   nbs = baij->nbs;
1818   for (i=0; i<nz; i++) {
1819     baij->j[i] = indices[i];
1820   }
1821   baij->nz = nz;
1822   for (i=0; i<nbs; i++) {
1823     baij->ilen[i] = baij->imax[i];
1824   }
1825 
1826   PetscFunctionReturn(0);
1827 }
1828 EXTERN_C_END
1829 
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1832 /*@
1833     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1834        in the matrix.
1835 
1836   Input Parameters:
1837 +  mat - the SeqBAIJ matrix
1838 -  indices - the column indices
1839 
1840   Level: advanced
1841 
1842   Notes:
1843     This can be called if you have precomputed the nonzero structure of the
1844   matrix and want to provide it to the matrix object to improve the performance
1845   of the MatSetValues() operation.
1846 
1847     You MUST have set the correct numbers of nonzeros per row in the call to
1848   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1849 
1850     MUST be called before any calls to MatSetValues();
1851 
1852 @*/
1853 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1854 {
1855   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1856 
1857   PetscFunctionBegin;
1858   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1859   PetscValidPointer(indices,2);
1860   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1861   if (f) {
1862     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1863   } else {
1864     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1871 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1872 {
1873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1874   PetscErrorCode ierr;
1875   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1876   PetscReal      atmp;
1877   PetscScalar    *x,zero = 0.0;
1878   MatScalar      *aa;
1879   PetscInt       ncols,brow,krow,kcol;
1880 
1881   PetscFunctionBegin;
1882   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1883   bs   = A->rmap.bs;
1884   aa   = a->a;
1885   ai   = a->i;
1886   aj   = a->j;
1887   mbs  = a->mbs;
1888 
1889   ierr = VecSet(v,zero);CHKERRQ(ierr);
1890   if (idx) {
1891     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1892   }
1893   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1894   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1895   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1896   for (i=0; i<mbs; i++) {
1897     ncols = ai[1] - ai[0]; ai++;
1898     brow  = bs*i;
1899     for (j=0; j<ncols; j++){
1900       for (kcol=0; kcol<bs; kcol++){
1901         for (krow=0; krow<bs; krow++){
1902           atmp = PetscAbsScalar(*aa);aa++;
1903           row = brow + krow;    /* row index */
1904           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1905           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1906         }
1907       }
1908       aj++;
1909     }
1910   }
1911   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatCopy_SeqBAIJ"
1917 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1918 {
1919   PetscErrorCode ierr;
1920 
1921   PetscFunctionBegin;
1922   /* If the two matrices have the same copy implementation, use fast copy. */
1923   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1924     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1925     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1926 
1927     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1928       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1929     }
1930     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1931   } else {
1932     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1933   }
1934   PetscFunctionReturn(0);
1935 }
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1939 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1940 {
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1950 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1951 {
1952   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1953   PetscFunctionBegin;
1954   *array = a->a;
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1960 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1961 {
1962   PetscFunctionBegin;
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #include "petscblaslapack.h"
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1969 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1970 {
1971   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1972   PetscErrorCode ierr;
1973   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1974   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1975 
1976   PetscFunctionBegin;
1977   if (str == SAME_NONZERO_PATTERN) {
1978     PetscScalar alpha = a;
1979     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1980   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1981     if (y->xtoy && y->XtoY != X) {
1982       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1983       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1984     }
1985     if (!y->xtoy) { /* get xtoy */
1986       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1987       y->XtoY = X;
1988     }
1989     bs2 = bs*bs;
1990     for (i=0; i<x->nz; i++) {
1991       j = 0;
1992       while (j < bs2){
1993         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1994         j++;
1995       }
1996     }
1997     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1998   } else {
1999     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2000   }
2001   PetscFunctionReturn(0);
2002 }
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2006 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2007 {
2008   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2009   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2010   PetscScalar    *aa = a->a;
2011 
2012   PetscFunctionBegin;
2013   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2019 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2020 {
2021   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2022   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2023   PetscScalar    *aa = a->a;
2024 
2025   PetscFunctionBegin;
2026   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2027   PetscFunctionReturn(0);
2028 }
2029 
2030 
2031 /* -------------------------------------------------------------------*/
2032 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2033        MatGetRow_SeqBAIJ,
2034        MatRestoreRow_SeqBAIJ,
2035        MatMult_SeqBAIJ_N,
2036 /* 4*/ MatMultAdd_SeqBAIJ_N,
2037        MatMultTranspose_SeqBAIJ,
2038        MatMultTransposeAdd_SeqBAIJ,
2039        MatSolve_SeqBAIJ_N,
2040        0,
2041        0,
2042 /*10*/ 0,
2043        MatLUFactor_SeqBAIJ,
2044        0,
2045        0,
2046        MatTranspose_SeqBAIJ,
2047 /*15*/ MatGetInfo_SeqBAIJ,
2048        MatEqual_SeqBAIJ,
2049        MatGetDiagonal_SeqBAIJ,
2050        MatDiagonalScale_SeqBAIJ,
2051        MatNorm_SeqBAIJ,
2052 /*20*/ 0,
2053        MatAssemblyEnd_SeqBAIJ,
2054        0,
2055        MatSetOption_SeqBAIJ,
2056        MatZeroEntries_SeqBAIJ,
2057 /*25*/ MatZeroRows_SeqBAIJ,
2058        MatLUFactorSymbolic_SeqBAIJ,
2059        MatLUFactorNumeric_SeqBAIJ_N,
2060        MatCholeskyFactorSymbolic_SeqBAIJ,
2061        MatCholeskyFactorNumeric_SeqBAIJ_N,
2062 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2063        MatILUFactorSymbolic_SeqBAIJ,
2064        MatICCFactorSymbolic_SeqBAIJ,
2065        MatGetArray_SeqBAIJ,
2066        MatRestoreArray_SeqBAIJ,
2067 /*35*/ MatDuplicate_SeqBAIJ,
2068        0,
2069        0,
2070        MatILUFactor_SeqBAIJ,
2071        0,
2072 /*40*/ MatAXPY_SeqBAIJ,
2073        MatGetSubMatrices_SeqBAIJ,
2074        MatIncreaseOverlap_SeqBAIJ,
2075        MatGetValues_SeqBAIJ,
2076        MatCopy_SeqBAIJ,
2077 /*45*/ 0,
2078        MatScale_SeqBAIJ,
2079        0,
2080        0,
2081        0,
2082 /*50*/ 0,
2083        MatGetRowIJ_SeqBAIJ,
2084        MatRestoreRowIJ_SeqBAIJ,
2085        0,
2086        0,
2087 /*55*/ 0,
2088        0,
2089        0,
2090        0,
2091        MatSetValuesBlocked_SeqBAIJ,
2092 /*60*/ MatGetSubMatrix_SeqBAIJ,
2093        MatDestroy_SeqBAIJ,
2094        MatView_SeqBAIJ,
2095        0,
2096        0,
2097 /*65*/ 0,
2098        0,
2099        0,
2100        0,
2101        0,
2102 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2103        MatConvert_Basic,
2104        0,
2105        0,
2106        0,
2107 /*75*/ 0,
2108        0,
2109        0,
2110        0,
2111        0,
2112 /*80*/ 0,
2113        0,
2114        0,
2115        0,
2116        MatLoad_SeqBAIJ,
2117 /*85*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*90*/ 0,
2123        0,
2124        0,
2125        0,
2126        0,
2127 /*95*/ 0,
2128        0,
2129        0,
2130        0,
2131        0,
2132 /*100*/0,
2133        0,
2134        0,
2135        0,
2136        0,
2137 /*105*/0,
2138        MatRealPart_SeqBAIJ,
2139        MatImaginaryPart_SeqBAIJ
2140 };
2141 
2142 EXTERN_C_BEGIN
2143 #undef __FUNCT__
2144 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2145 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2146 {
2147   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2148   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2149   PetscErrorCode ierr;
2150 
2151   PetscFunctionBegin;
2152   if (aij->nonew != 1) {
2153     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2154   }
2155 
2156   /* allocate space for values if not already there */
2157   if (!aij->saved_values) {
2158     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2159   }
2160 
2161   /* copy values over */
2162   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2163   PetscFunctionReturn(0);
2164 }
2165 EXTERN_C_END
2166 
2167 EXTERN_C_BEGIN
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2170 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2171 {
2172   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2173   PetscErrorCode ierr;
2174   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2175 
2176   PetscFunctionBegin;
2177   if (aij->nonew != 1) {
2178     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2179   }
2180   if (!aij->saved_values) {
2181     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2182   }
2183 
2184   /* copy values over */
2185   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2186   PetscFunctionReturn(0);
2187 }
2188 EXTERN_C_END
2189 
2190 EXTERN_C_BEGIN
2191 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2192 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2193 EXTERN_C_END
2194 
2195 EXTERN_C_BEGIN
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2198 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2199 {
2200   Mat_SeqBAIJ    *b;
2201   PetscErrorCode ierr;
2202   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2203   PetscTruth     flg,skipallocation = PETSC_FALSE;
2204 
2205   PetscFunctionBegin;
2206 
2207   if (nz == MAT_SKIP_ALLOCATION) {
2208     skipallocation = PETSC_TRUE;
2209     nz             = 0;
2210   }
2211 
2212   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2213     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2214   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2215 
2216   if (nnz && newbs != bs) {
2217     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2218   }
2219   bs   = newbs;
2220 
2221   B->rmap.bs = B->cmap.bs = bs;
2222   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2223   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2224 
2225   B->preallocated = PETSC_TRUE;
2226 
2227   mbs  = B->rmap.n/bs;
2228   nbs  = B->cmap.n/bs;
2229   bs2  = bs*bs;
2230 
2231   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2232     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2233   }
2234 
2235   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2236   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2237   if (nnz) {
2238     for (i=0; i<mbs; i++) {
2239       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2240       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2241     }
2242   }
2243 
2244   b       = (Mat_SeqBAIJ*)B->data;
2245   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2246     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2247   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2248 
2249   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2250   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2251   if (!flg) {
2252     switch (bs) {
2253     case 1:
2254       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2255       B->ops->mult            = MatMult_SeqBAIJ_1;
2256       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2257       break;
2258     case 2:
2259       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2260       B->ops->mult            = MatMult_SeqBAIJ_2;
2261       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2262       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2263       break;
2264     case 3:
2265       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2266       B->ops->mult            = MatMult_SeqBAIJ_3;
2267       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2268       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2269       break;
2270     case 4:
2271       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2272       B->ops->mult            = MatMult_SeqBAIJ_4;
2273       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2274       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2275       break;
2276     case 5:
2277       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2278       B->ops->mult            = MatMult_SeqBAIJ_5;
2279       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2280       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2281       break;
2282     case 6:
2283       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2284       B->ops->mult            = MatMult_SeqBAIJ_6;
2285       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2286       break;
2287     case 7:
2288       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2289       B->ops->mult            = MatMult_SeqBAIJ_7;
2290       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2291       break;
2292     default:
2293       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2294       B->ops->mult            = MatMult_SeqBAIJ_N;
2295       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2296       break;
2297     }
2298   }
2299   B->rmap.bs      = bs;
2300   b->mbs     = mbs;
2301   b->nbs     = nbs;
2302   if (!skipallocation) {
2303     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2304     /* b->ilen will count nonzeros in each block row so far. */
2305     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2306     if (!nnz) {
2307       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2308       else if (nz <= 0)        nz = 1;
2309       for (i=0; i<mbs; i++) b->imax[i] = nz;
2310       nz = nz*mbs;
2311     } else {
2312       nz = 0;
2313       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2314     }
2315 
2316     /* allocate the matrix space */
2317     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2318     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2319     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2320     b->singlemalloc = PETSC_TRUE;
2321 
2322     b->i[0] = 0;
2323     for (i=1; i<mbs+1; i++) {
2324       b->i[i] = b->i[i-1] + b->imax[i-1];
2325     }
2326     b->free_a     = PETSC_TRUE;
2327     b->free_ij    = PETSC_TRUE;
2328   } else {
2329     b->free_a     = PETSC_FALSE;
2330     b->free_ij    = PETSC_FALSE;
2331   }
2332 
2333   B->rmap.bs          = bs;
2334   b->bs2              = bs2;
2335   b->mbs              = mbs;
2336   b->nz               = 0;
2337   b->maxnz            = nz*bs2;
2338   B->info.nz_unneeded = (PetscReal)b->maxnz;
2339   PetscFunctionReturn(0);
2340 }
2341 EXTERN_C_END
2342 
2343 /*MC
2344    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2345    block sparse compressed row format.
2346 
2347    Options Database Keys:
2348 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2349 
2350   Level: beginner
2351 
2352 .seealso: MatCreateSeqBAIJ()
2353 M*/
2354 
2355 EXTERN_C_BEGIN
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatCreate_SeqBAIJ"
2358 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2359 {
2360   PetscErrorCode ierr;
2361   PetscMPIInt    size;
2362   Mat_SeqBAIJ    *b;
2363 
2364   PetscFunctionBegin;
2365   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2366   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2367 
2368   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2369   B->data = (void*)b;
2370   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2371   B->factor           = 0;
2372   B->mapping          = 0;
2373   b->row              = 0;
2374   b->col              = 0;
2375   b->icol             = 0;
2376   b->reallocs         = 0;
2377   b->saved_values     = 0;
2378 #if defined(PETSC_USE_MAT_SINGLE)
2379   b->setvalueslen     = 0;
2380   b->setvaluescopy    = PETSC_NULL;
2381 #endif
2382 
2383   b->roworiented      = PETSC_TRUE;
2384   b->nonew            = 0;
2385   b->diag             = 0;
2386   b->solve_work       = 0;
2387   b->mult_work        = 0;
2388   B->spptr            = 0;
2389   B->info.nz_unneeded = (PetscReal)b->maxnz;
2390   b->keepzeroedrows   = PETSC_FALSE;
2391   b->xtoy              = 0;
2392   b->XtoY              = 0;
2393   b->compressedrow.use     = PETSC_FALSE;
2394   b->compressedrow.nrows   = 0;
2395   b->compressedrow.i       = PETSC_NULL;
2396   b->compressedrow.rindex  = PETSC_NULL;
2397   b->compressedrow.checked = PETSC_FALSE;
2398   B->same_nonzero          = PETSC_FALSE;
2399 
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2401                                      "MatInvertBlockDiagonal_SeqBAIJ",
2402                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2404                                      "MatStoreValues_SeqBAIJ",
2405                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2407                                      "MatRetrieveValues_SeqBAIJ",
2408                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2410                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2411                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2413                                      "MatConvert_SeqBAIJ_SeqAIJ",
2414                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2416                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2417                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2419                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2420                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2421   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 EXTERN_C_END
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2429 {
2430   Mat            C;
2431   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2432   PetscErrorCode ierr;
2433   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2434 
2435   PetscFunctionBegin;
2436   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2437 
2438   *B = 0;
2439   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2440   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2441   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2442   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2443   c    = (Mat_SeqBAIJ*)C->data;
2444 
2445   C->rmap.N   = A->rmap.N;
2446   C->cmap.N   = A->cmap.N;
2447   C->rmap.bs  = A->rmap.bs;
2448   c->bs2 = a->bs2;
2449   c->mbs = a->mbs;
2450   c->nbs = a->nbs;
2451 
2452   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2453   for (i=0; i<mbs; i++) {
2454     c->imax[i] = a->imax[i];
2455     c->ilen[i] = a->ilen[i];
2456   }
2457 
2458   /* allocate the matrix space */
2459   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2460   c->singlemalloc = PETSC_TRUE;
2461   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2462   if (mbs > 0) {
2463     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2464     if (cpvalues == MAT_COPY_VALUES) {
2465       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2466     } else {
2467       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2468     }
2469   }
2470   c->roworiented = a->roworiented;
2471   c->nonew       = a->nonew;
2472 
2473   if (a->diag) {
2474     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2475     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2476     for (i=0; i<mbs; i++) {
2477       c->diag[i] = a->diag[i];
2478     }
2479   } else c->diag        = 0;
2480   c->nz                 = a->nz;
2481   c->maxnz              = a->maxnz;
2482   c->solve_work         = 0;
2483   c->mult_work          = 0;
2484   c->free_a             = PETSC_TRUE;
2485   c->free_ij            = PETSC_TRUE;
2486   C->preallocated       = PETSC_TRUE;
2487   C->assembled          = PETSC_TRUE;
2488 
2489   c->compressedrow.use     = a->compressedrow.use;
2490   c->compressedrow.nrows   = a->compressedrow.nrows;
2491   c->compressedrow.checked = a->compressedrow.checked;
2492   if ( a->compressedrow.checked && a->compressedrow.use){
2493     i = a->compressedrow.nrows;
2494     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2495     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2496     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2497     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2498   } else {
2499     c->compressedrow.use    = PETSC_FALSE;
2500     c->compressedrow.i      = PETSC_NULL;
2501     c->compressedrow.rindex = PETSC_NULL;
2502   }
2503   C->same_nonzero = A->same_nonzero;
2504   *B = C;
2505   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2506   PetscFunctionReturn(0);
2507 }
2508 
2509 #undef __FUNCT__
2510 #define __FUNCT__ "MatLoad_SeqBAIJ"
2511 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2512 {
2513   Mat_SeqBAIJ    *a;
2514   Mat            B;
2515   PetscErrorCode ierr;
2516   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2517   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2518   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2519   PetscInt       *masked,nmask,tmp,bs2,ishift;
2520   PetscMPIInt    size;
2521   int            fd;
2522   PetscScalar    *aa;
2523   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2524 
2525   PetscFunctionBegin;
2526   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2527     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2528   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2529   bs2  = bs*bs;
2530 
2531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2532   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2533   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2534   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2535   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2536   M = header[1]; N = header[2]; nz = header[3];
2537 
2538   if (header[3] < 0) {
2539     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2540   }
2541 
2542   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2543 
2544   /*
2545      This code adds extra rows to make sure the number of rows is
2546     divisible by the blocksize
2547   */
2548   mbs        = M/bs;
2549   extra_rows = bs - M + bs*(mbs);
2550   if (extra_rows == bs) extra_rows = 0;
2551   else                  mbs++;
2552   if (extra_rows) {
2553     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2554   }
2555 
2556   /* read in row lengths */
2557   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2558   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2559   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2560 
2561   /* read in column indices */
2562   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2563   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2564   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2565 
2566   /* loop over row lengths determining block row lengths */
2567   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2568   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2569   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2570   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2571   masked   = mask + mbs;
2572   rowcount = 0; nzcount = 0;
2573   for (i=0; i<mbs; i++) {
2574     nmask = 0;
2575     for (j=0; j<bs; j++) {
2576       kmax = rowlengths[rowcount];
2577       for (k=0; k<kmax; k++) {
2578         tmp = jj[nzcount++]/bs;
2579         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2580       }
2581       rowcount++;
2582     }
2583     browlengths[i] += nmask;
2584     /* zero out the mask elements we set */
2585     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2586   }
2587 
2588   /* create our matrix */
2589   ierr = MatCreate(comm,&B);
2590   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2591   ierr = MatSetType(B,type);CHKERRQ(ierr);
2592   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2593   a = (Mat_SeqBAIJ*)B->data;
2594 
2595   /* set matrix "i" values */
2596   a->i[0] = 0;
2597   for (i=1; i<= mbs; i++) {
2598     a->i[i]      = a->i[i-1] + browlengths[i-1];
2599     a->ilen[i-1] = browlengths[i-1];
2600   }
2601   a->nz         = 0;
2602   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2603 
2604   /* read in nonzero values */
2605   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2606   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2607   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2608 
2609   /* set "a" and "j" values into matrix */
2610   nzcount = 0; jcount = 0;
2611   for (i=0; i<mbs; i++) {
2612     nzcountb = nzcount;
2613     nmask    = 0;
2614     for (j=0; j<bs; j++) {
2615       kmax = rowlengths[i*bs+j];
2616       for (k=0; k<kmax; k++) {
2617         tmp = jj[nzcount++]/bs;
2618 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2619       }
2620     }
2621     /* sort the masked values */
2622     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2623 
2624     /* set "j" values into matrix */
2625     maskcount = 1;
2626     for (j=0; j<nmask; j++) {
2627       a->j[jcount++]  = masked[j];
2628       mask[masked[j]] = maskcount++;
2629     }
2630     /* set "a" values into matrix */
2631     ishift = bs2*a->i[i];
2632     for (j=0; j<bs; j++) {
2633       kmax = rowlengths[i*bs+j];
2634       for (k=0; k<kmax; k++) {
2635         tmp       = jj[nzcountb]/bs ;
2636         block     = mask[tmp] - 1;
2637         point     = jj[nzcountb] - bs*tmp;
2638         idx       = ishift + bs2*block + j + bs*point;
2639         a->a[idx] = (MatScalar)aa[nzcountb++];
2640       }
2641     }
2642     /* zero out the mask elements we set */
2643     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2644   }
2645   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2646 
2647   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2648   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2649   ierr = PetscFree(aa);CHKERRQ(ierr);
2650   ierr = PetscFree(jj);CHKERRQ(ierr);
2651   ierr = PetscFree(mask);CHKERRQ(ierr);
2652 
2653   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655   ierr = MatView_Private(B);CHKERRQ(ierr);
2656 
2657   *A = B;
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatCreateSeqBAIJ"
2663 /*@C
2664    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2665    compressed row) format.  For good matrix assembly performance the
2666    user should preallocate the matrix storage by setting the parameter nz
2667    (or the array nnz).  By setting these parameters accurately, performance
2668    during matrix assembly can be increased by more than a factor of 50.
2669 
2670    Collective on MPI_Comm
2671 
2672    Input Parameters:
2673 +  comm - MPI communicator, set to PETSC_COMM_SELF
2674 .  bs - size of block
2675 .  m - number of rows
2676 .  n - number of columns
2677 .  nz - number of nonzero blocks  per block row (same for all rows)
2678 -  nnz - array containing the number of nonzero blocks in the various block rows
2679          (possibly different for each block row) or PETSC_NULL
2680 
2681    Output Parameter:
2682 .  A - the matrix
2683 
2684    Options Database Keys:
2685 .   -mat_no_unroll - uses code that does not unroll the loops in the
2686                      block calculations (much slower)
2687 .    -mat_block_size - size of the blocks to use
2688 
2689    Level: intermediate
2690 
2691    Notes:
2692    The number of rows and columns must be divisible by blocksize.
2693 
2694    If the nnz parameter is given then the nz parameter is ignored
2695 
2696    A nonzero block is any block that as 1 or more nonzeros in it
2697 
2698    The block AIJ format is fully compatible with standard Fortran 77
2699    storage.  That is, the stored row and column indices can begin at
2700    either one (as in Fortran) or zero.  See the users' manual for details.
2701 
2702    Specify the preallocated storage with either nz or nnz (not both).
2703    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2704    allocation.  For additional details, see the users manual chapter on
2705    matrices.
2706 
2707 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2708 @*/
2709 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2710 {
2711   PetscErrorCode ierr;
2712 
2713   PetscFunctionBegin;
2714   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2715   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2716   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2717   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2723 /*@C
2724    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2725    per row in the matrix. For good matrix assembly performance the
2726    user should preallocate the matrix storage by setting the parameter nz
2727    (or the array nnz).  By setting these parameters accurately, performance
2728    during matrix assembly can be increased by more than a factor of 50.
2729 
2730    Collective on MPI_Comm
2731 
2732    Input Parameters:
2733 +  A - the matrix
2734 .  bs - size of block
2735 .  nz - number of block nonzeros per block row (same for all rows)
2736 -  nnz - array containing the number of block nonzeros in the various block rows
2737          (possibly different for each block row) or PETSC_NULL
2738 
2739    Options Database Keys:
2740 .   -mat_no_unroll - uses code that does not unroll the loops in the
2741                      block calculations (much slower)
2742 .    -mat_block_size - size of the blocks to use
2743 
2744    Level: intermediate
2745 
2746    Notes:
2747    If the nnz parameter is given then the nz parameter is ignored
2748 
2749    The block AIJ format is fully compatible with standard Fortran 77
2750    storage.  That is, the stored row and column indices can begin at
2751    either one (as in Fortran) or zero.  See the users' manual for details.
2752 
2753    Specify the preallocated storage with either nz or nnz (not both).
2754    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2755    allocation.  For additional details, see the users manual chapter on
2756    matrices.
2757 
2758 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2759 @*/
2760 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2761 {
2762   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2763 
2764   PetscFunctionBegin;
2765   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2766   if (f) {
2767     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2768   }
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2774 /*@
2775      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2776               (upper triangular entries in CSR format) provided by the user.
2777 
2778      Collective on MPI_Comm
2779 
2780    Input Parameters:
2781 +  comm - must be an MPI communicator of size 1
2782 .  bs - size of block
2783 .  m - number of rows
2784 .  n - number of columns
2785 .  i - row indices
2786 .  j - column indices
2787 -  a - matrix values
2788 
2789    Output Parameter:
2790 .  mat - the matrix
2791 
2792    Level: intermediate
2793 
2794    Notes:
2795        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2796     once the matrix is destroyed
2797 
2798        You cannot set new nonzero locations into this matrix, that will generate an error.
2799 
2800        The i and j indices are 0 based
2801 
2802 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2803 
2804 @*/
2805 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2806 {
2807   PetscErrorCode ierr;
2808   PetscInt       ii;
2809   Mat_SeqBAIJ    *baij;
2810 
2811   PetscFunctionBegin;
2812   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2813   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2814 
2815   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2816   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2817   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2818   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2819   baij = (Mat_SeqBAIJ*)(*mat)->data;
2820   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2821 
2822   baij->i = i;
2823   baij->j = j;
2824   baij->a = a;
2825   baij->singlemalloc = PETSC_FALSE;
2826   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2827   baij->free_a       = PETSC_FALSE;
2828   baij->free_ij       = PETSC_FALSE;
2829 
2830   for (ii=0; ii<m; ii++) {
2831     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2832 #if defined(PETSC_USE_DEBUG)
2833     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2834 #endif
2835   }
2836 #if defined(PETSC_USE_DEBUG)
2837   for (ii=0; ii<baij->i[m]; ii++) {
2838     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2839     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2840   }
2841 #endif
2842 
2843   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845   PetscFunctionReturn(0);
2846 }
2847