xref: /petsc/src/mat/impls/baij/seq/baij.c (revision dbbb2e4e304796ec221bf0d5ec5bdf6f874ca77c)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt       n = a->mbs,i;
785 
786   PetscFunctionBegin;
787   *nn = n;
788   if (!ia) PetscFunctionReturn(0);
789   if (symmetric) {
790     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
791   } else if (oshift == 1) {
792     /* temporarily add 1 to i and j indices */
793     PetscInt nz = a->i[n];
794     for (i=0; i<nz; i++) a->j[i]++;
795     for (i=0; i<n+1; i++) a->i[i]++;
796     *ia = a->i; *ja = a->j;
797   } else {
798     *ia = a->i; *ja = a->j;
799   }
800 
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807 {
808   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
809   PetscErrorCode ierr;
810   PetscInt       i,n = a->mbs;
811 
812   PetscFunctionBegin;
813   if (!ia) PetscFunctionReturn(0);
814   if (symmetric) {
815     ierr = PetscFree(*ia);CHKERRQ(ierr);
816     ierr = PetscFree(*ja);CHKERRQ(ierr);
817   } else if (oshift == 1) {
818     PetscInt nz = a->i[n]-1;
819     for (i=0; i<nz; i++) a->j[i]--;
820     for (i=0; i<n+1; i++) a->i[i]--;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_SeqBAIJ"
827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835 #endif
836   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(a->diag);CHKERRQ(ierr);
844   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
845   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
846   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
847   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
850 #if defined(PETSC_USE_MAT_SINGLE)
851   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
852 #endif
853   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetOption_SeqBAIJ"
870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
871 {
872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   switch (op) {
877   case MAT_ROW_ORIENTED:
878     a->roworiented    = PETSC_TRUE;
879     break;
880   case MAT_COLUMN_ORIENTED:
881     a->roworiented    = PETSC_FALSE;
882     break;
883   case MAT_COLUMNS_SORTED:
884     a->sorted         = PETSC_TRUE;
885     break;
886   case MAT_COLUMNS_UNSORTED:
887     a->sorted         = PETSC_FALSE;
888     break;
889   case MAT_KEEP_ZEROED_ROWS:
890     a->keepzeroedrows = PETSC_TRUE;
891     break;
892   case MAT_NO_NEW_NONZERO_LOCATIONS:
893     a->nonew          = 1;
894     break;
895   case MAT_NEW_NONZERO_LOCATION_ERR:
896     a->nonew          = -1;
897     break;
898   case MAT_NEW_NONZERO_ALLOCATION_ERR:
899     a->nonew          = -2;
900     break;
901   case MAT_YES_NEW_NONZERO_LOCATIONS:
902     a->nonew          = 0;
903     break;
904   case MAT_ROWS_SORTED:
905   case MAT_ROWS_UNSORTED:
906   case MAT_YES_NEW_DIAGONALS:
907   case MAT_IGNORE_OFF_PROC_ENTRIES:
908   case MAT_USE_HASH_TABLE:
909     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
910     break;
911   case MAT_NO_NEW_DIAGONALS:
912     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
913   case MAT_SYMMETRIC:
914   case MAT_STRUCTURALLY_SYMMETRIC:
915   case MAT_NOT_SYMMETRIC:
916   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
917   case MAT_HERMITIAN:
918   case MAT_NOT_HERMITIAN:
919   case MAT_SYMMETRY_ETERNAL:
920   case MAT_NOT_SYMMETRY_ETERNAL:
921     break;
922   default:
923     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
924   }
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatGetRow_SeqBAIJ"
930 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
931 {
932   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
933   PetscErrorCode ierr;
934   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
935   MatScalar      *aa,*aa_i;
936   PetscScalar    *v_i;
937 
938   PetscFunctionBegin;
939   bs  = A->rmap.bs;
940   ai  = a->i;
941   aj  = a->j;
942   aa  = a->a;
943   bs2 = a->bs2;
944 
945   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
946 
947   bn  = row/bs;   /* Block number */
948   bp  = row % bs; /* Block Position */
949   M   = ai[bn+1] - ai[bn];
950   *nz = bs*M;
951 
952   if (v) {
953     *v = 0;
954     if (*nz) {
955       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
956       for (i=0; i<M; i++) { /* for each block in the block row */
957         v_i  = *v + i*bs;
958         aa_i = aa + bs2*(ai[bn] + i);
959         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
960       }
961     }
962   }
963 
964   if (idx) {
965     *idx = 0;
966     if (*nz) {
967       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
968       for (i=0; i<M; i++) { /* for each block in the block row */
969         idx_i = *idx + i*bs;
970         itmp  = bs*aj[ai[bn] + i];
971         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
972       }
973     }
974   }
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
980 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
981 {
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
986   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatTranspose_SeqBAIJ"
992 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
993 {
994   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
995   Mat            C;
996   PetscErrorCode ierr;
997   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
998   PetscInt       *rows,*cols,bs2=a->bs2;
999   PetscScalar    *array;
1000 
1001   PetscFunctionBegin;
1002   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1003   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1004   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1005 
1006 #if defined(PETSC_USE_MAT_SINGLE)
1007   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1008   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1009 #else
1010   array = a->a;
1011 #endif
1012 
1013   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1014   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1015   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1016   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1017   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1018   ierr = PetscFree(col);CHKERRQ(ierr);
1019   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1020   cols = rows + bs;
1021   for (i=0; i<mbs; i++) {
1022     cols[0] = i*bs;
1023     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1024     len = ai[i+1] - ai[i];
1025     for (j=0; j<len; j++) {
1026       rows[0] = (*aj++)*bs;
1027       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1028       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1029       array += bs2;
1030     }
1031   }
1032   ierr = PetscFree(rows);CHKERRQ(ierr);
1033 #if defined(PETSC_USE_MAT_SINGLE)
1034   ierr = PetscFree(array);
1035 #endif
1036 
1037   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1038   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039 
1040   if (B) {
1041     *B = C;
1042   } else {
1043     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1044   }
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1050 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1051 {
1052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1053   PetscErrorCode ierr;
1054   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1055   int            fd;
1056   PetscScalar    *aa;
1057   FILE           *file;
1058 
1059   PetscFunctionBegin;
1060   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1061   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1062   col_lens[0] = MAT_FILE_COOKIE;
1063 
1064   col_lens[1] = A->rmap.N;
1065   col_lens[2] = A->cmap.n;
1066   col_lens[3] = a->nz*bs2;
1067 
1068   /* store lengths of each row and write (including header) to file */
1069   count = 0;
1070   for (i=0; i<a->mbs; i++) {
1071     for (j=0; j<bs; j++) {
1072       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1073     }
1074   }
1075   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1076   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1077 
1078   /* store column indices (zero start index) */
1079   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1080   count = 0;
1081   for (i=0; i<a->mbs; i++) {
1082     for (j=0; j<bs; j++) {
1083       for (k=a->i[i]; k<a->i[i+1]; k++) {
1084         for (l=0; l<bs; l++) {
1085           jj[count++] = bs*a->j[k] + l;
1086         }
1087       }
1088     }
1089   }
1090   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1091   ierr = PetscFree(jj);CHKERRQ(ierr);
1092 
1093   /* store nonzero values */
1094   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1095   count = 0;
1096   for (i=0; i<a->mbs; i++) {
1097     for (j=0; j<bs; j++) {
1098       for (k=a->i[i]; k<a->i[i+1]; k++) {
1099         for (l=0; l<bs; l++) {
1100           aa[count++] = a->a[bs2*k + l*bs + j];
1101         }
1102       }
1103     }
1104   }
1105   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1106   ierr = PetscFree(aa);CHKERRQ(ierr);
1107 
1108   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1109   if (file) {
1110     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1111   }
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1117 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1118 {
1119   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1120   PetscErrorCode    ierr;
1121   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1122   PetscViewerFormat format;
1123 
1124   PetscFunctionBegin;
1125   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1126   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1127     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1128   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1129     Mat aij;
1130     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1131     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1132     ierr = MatDestroy(aij);CHKERRQ(ierr);
1133   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1134      PetscFunctionReturn(0);
1135   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1136     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1137     for (i=0; i<a->mbs; i++) {
1138       for (j=0; j<bs; j++) {
1139         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1140         for (k=a->i[i]; k<a->i[i+1]; k++) {
1141           for (l=0; l<bs; l++) {
1142 #if defined(PETSC_USE_COMPLEX)
1143             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1144               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1145                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1146             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1147               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1148                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1149             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1150               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             }
1152 #else
1153             if (a->a[bs2*k + l*bs + j] != 0.0) {
1154               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1155             }
1156 #endif
1157           }
1158         }
1159         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1160       }
1161     }
1162     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1163   } else {
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1165     for (i=0; i<a->mbs; i++) {
1166       for (j=0; j<bs; j++) {
1167         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1168         for (k=a->i[i]; k<a->i[i+1]; k++) {
1169           for (l=0; l<bs; l++) {
1170 #if defined(PETSC_USE_COMPLEX)
1171             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1172               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1173                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1174             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1175               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1176                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1177             } else {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             }
1180 #else
1181             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1182 #endif
1183           }
1184         }
1185         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1186       }
1187     }
1188     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1189   }
1190   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1191   PetscFunctionReturn(0);
1192 }
1193 
1194 #undef __FUNCT__
1195 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1196 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1197 {
1198   Mat            A = (Mat) Aa;
1199   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1202   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1203   MatScalar      *aa;
1204   PetscViewer    viewer;
1205 
1206   PetscFunctionBegin;
1207 
1208   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1209   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1210 
1211   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1212 
1213   /* loop over matrix elements drawing boxes */
1214   color = PETSC_DRAW_BLUE;
1215   for (i=0,row=0; i<mbs; i++,row+=bs) {
1216     for (j=a->i[i]; j<a->i[i+1]; j++) {
1217       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1218       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1219       aa = a->a + j*bs2;
1220       for (k=0; k<bs; k++) {
1221         for (l=0; l<bs; l++) {
1222           if (PetscRealPart(*aa++) >=  0.) continue;
1223           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1224         }
1225       }
1226     }
1227   }
1228   color = PETSC_DRAW_CYAN;
1229   for (i=0,row=0; i<mbs; i++,row+=bs) {
1230     for (j=a->i[i]; j<a->i[i+1]; j++) {
1231       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1232       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1233       aa = a->a + j*bs2;
1234       for (k=0; k<bs; k++) {
1235         for (l=0; l<bs; l++) {
1236           if (PetscRealPart(*aa++) != 0.) continue;
1237           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1238         }
1239       }
1240     }
1241   }
1242 
1243   color = PETSC_DRAW_RED;
1244   for (i=0,row=0; i<mbs; i++,row+=bs) {
1245     for (j=a->i[i]; j<a->i[i+1]; j++) {
1246       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1247       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1248       aa = a->a + j*bs2;
1249       for (k=0; k<bs; k++) {
1250         for (l=0; l<bs; l++) {
1251           if (PetscRealPart(*aa++) <= 0.) continue;
1252           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1253         }
1254       }
1255     }
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1262 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1263 {
1264   PetscErrorCode ierr;
1265   PetscReal      xl,yl,xr,yr,w,h;
1266   PetscDraw      draw;
1267   PetscTruth     isnull;
1268 
1269   PetscFunctionBegin;
1270 
1271   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1272   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1273 
1274   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1275   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1276   xr += w;    yr += h;  xl = -w;     yl = -h;
1277   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1278   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1279   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1280   PetscFunctionReturn(0);
1281 }
1282 
1283 #undef __FUNCT__
1284 #define __FUNCT__ "MatView_SeqBAIJ"
1285 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1286 {
1287   PetscErrorCode ierr;
1288   PetscTruth     iascii,isbinary,isdraw;
1289 
1290   PetscFunctionBegin;
1291   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1294   if (iascii){
1295     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1296   } else if (isbinary) {
1297     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1298   } else if (isdraw) {
1299     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1300   } else {
1301     Mat B;
1302     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1303     ierr = MatView(B,viewer);CHKERRQ(ierr);
1304     ierr = MatDestroy(B);CHKERRQ(ierr);
1305   }
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1312 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1313 {
1314   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1315   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1316   PetscInt    *ai = a->i,*ailen = a->ilen;
1317   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1318   MatScalar   *ap,*aa = a->a,zero = 0.0;
1319 
1320   PetscFunctionBegin;
1321   for (k=0; k<m; k++) { /* loop over rows */
1322     row  = im[k]; brow = row/bs;
1323     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1324     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1325     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1326     nrow = ailen[brow];
1327     for (l=0; l<n; l++) { /* loop over columns */
1328       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1329       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1330       col  = in[l] ;
1331       bcol = col/bs;
1332       cidx = col%bs;
1333       ridx = row%bs;
1334       high = nrow;
1335       low  = 0; /* assume unsorted */
1336       while (high-low > 5) {
1337         t = (low+high)/2;
1338         if (rp[t] > bcol) high = t;
1339         else             low  = t;
1340       }
1341       for (i=low; i<high; i++) {
1342         if (rp[i] > bcol) break;
1343         if (rp[i] == bcol) {
1344           *v++ = ap[bs2*i+bs*cidx+ridx];
1345           goto finished;
1346         }
1347       }
1348       *v++ = zero;
1349       finished:;
1350     }
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #if defined(PETSC_USE_MAT_SINGLE)
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1358 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1359 {
1360   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1361   PetscErrorCode ierr;
1362   PetscInt       i,N = m*n*b->bs2;
1363   MatScalar      *vsingle;
1364 
1365   PetscFunctionBegin;
1366   if (N > b->setvalueslen) {
1367     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1368     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1369     b->setvalueslen  = N;
1370   }
1371   vsingle = b->setvaluescopy;
1372   for (i=0; i<N; i++) {
1373     vsingle[i] = v[i];
1374   }
1375   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1376   PetscFunctionReturn(0);
1377 }
1378 #endif
1379 
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1383 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1384 {
1385   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1386   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1387   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1388   PetscErrorCode  ierr;
1389   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1390   PetscTruth      roworiented=a->roworiented;
1391   const MatScalar *value = v;
1392   MatScalar       *ap,*aa = a->a,*bap;
1393 
1394   PetscFunctionBegin;
1395   if (roworiented) {
1396     stepval = (n-1)*bs;
1397   } else {
1398     stepval = (m-1)*bs;
1399   }
1400   for (k=0; k<m; k++) { /* loop over added rows */
1401     row  = im[k];
1402     if (row < 0) continue;
1403 #if defined(PETSC_USE_DEBUG)
1404     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1405 #endif
1406     rp   = aj + ai[row];
1407     ap   = aa + bs2*ai[row];
1408     rmax = imax[row];
1409     nrow = ailen[row];
1410     low  = 0;
1411     high = nrow;
1412     for (l=0; l<n; l++) { /* loop over added columns */
1413       if (in[l] < 0) continue;
1414 #if defined(PETSC_USE_DEBUG)
1415       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1416 #endif
1417       col = in[l];
1418       if (roworiented) {
1419         value = v + k*(stepval+bs)*bs + l*bs;
1420       } else {
1421         value = v + l*(stepval+bs)*bs + k*bs;
1422       }
1423       if (col <= lastcol) low = 0; else high = nrow;
1424       lastcol = col;
1425       while (high-low > 7) {
1426         t = (low+high)/2;
1427         if (rp[t] > col) high = t;
1428         else             low  = t;
1429       }
1430       for (i=low; i<high; i++) {
1431         if (rp[i] > col) break;
1432         if (rp[i] == col) {
1433           bap  = ap +  bs2*i;
1434           if (roworiented) {
1435             if (is == ADD_VALUES) {
1436               for (ii=0; ii<bs; ii++,value+=stepval) {
1437                 for (jj=ii; jj<bs2; jj+=bs) {
1438                   bap[jj] += *value++;
1439                 }
1440               }
1441             } else {
1442               for (ii=0; ii<bs; ii++,value+=stepval) {
1443                 for (jj=ii; jj<bs2; jj+=bs) {
1444                   bap[jj] = *value++;
1445                 }
1446               }
1447             }
1448           } else {
1449             if (is == ADD_VALUES) {
1450               for (ii=0; ii<bs; ii++,value+=stepval) {
1451                 for (jj=0; jj<bs; jj++) {
1452                   *bap++ += *value++;
1453                 }
1454               }
1455             } else {
1456               for (ii=0; ii<bs; ii++,value+=stepval) {
1457                 for (jj=0; jj<bs; jj++) {
1458                   *bap++  = *value++;
1459                 }
1460               }
1461             }
1462           }
1463           goto noinsert2;
1464         }
1465       }
1466       if (nonew == 1) goto noinsert2;
1467       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1468       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew);
1469       N = nrow++ - 1; high++;
1470       /* shift up all the later entries in this row */
1471       for (ii=N; ii>=i; ii--) {
1472         rp[ii+1] = rp[ii];
1473         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1474       }
1475       if (N >= i) {
1476         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1477       }
1478       rp[i] = col;
1479       bap   = ap +  bs2*i;
1480       if (roworiented) {
1481         for (ii=0; ii<bs; ii++,value+=stepval) {
1482           for (jj=ii; jj<bs2; jj+=bs) {
1483             bap[jj] = *value++;
1484           }
1485         }
1486       } else {
1487         for (ii=0; ii<bs; ii++,value+=stepval) {
1488           for (jj=0; jj<bs; jj++) {
1489             *bap++  = *value++;
1490           }
1491         }
1492       }
1493       noinsert2:;
1494       low = i;
1495     }
1496     ailen[row] = nrow;
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1503 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1504 {
1505   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1506   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1507   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1508   PetscErrorCode ierr;
1509   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1510   MatScalar      *aa = a->a,*ap;
1511   PetscReal      ratio=0.6;
1512 
1513   PetscFunctionBegin;
1514   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1515 
1516   if (m) rmax = ailen[0];
1517   for (i=1; i<mbs; i++) {
1518     /* move each row back by the amount of empty slots (fshift) before it*/
1519     fshift += imax[i-1] - ailen[i-1];
1520     rmax   = PetscMax(rmax,ailen[i]);
1521     if (fshift) {
1522       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1523       N = ailen[i];
1524       for (j=0; j<N; j++) {
1525         ip[j-fshift] = ip[j];
1526         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1527       }
1528     }
1529     ai[i] = ai[i-1] + ailen[i-1];
1530   }
1531   if (mbs) {
1532     fshift += imax[mbs-1] - ailen[mbs-1];
1533     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1534   }
1535   /* reset ilen and imax for each row */
1536   for (i=0; i<mbs; i++) {
1537     ailen[i] = imax[i] = ai[i+1] - ai[i];
1538   }
1539   a->nz = ai[mbs];
1540 
1541   /* diagonals may have moved, so kill the diagonal pointers */
1542   a->idiagvalid = PETSC_FALSE;
1543   if (fshift && a->diag) {
1544     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1545     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1546     a->diag = 0;
1547   }
1548   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1549   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1550   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1551   a->reallocs          = 0;
1552   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1553 
1554   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1555   if (a->compressedrow.use){
1556     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1557   }
1558 
1559   A->same_nonzero = PETSC_TRUE;
1560   PetscFunctionReturn(0);
1561 }
1562 
1563 /*
1564    This function returns an array of flags which indicate the locations of contiguous
1565    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1566    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1567    Assume: sizes should be long enough to hold all the values.
1568 */
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1571 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1572 {
1573   PetscInt   i,j,k,row;
1574   PetscTruth flg;
1575 
1576   PetscFunctionBegin;
1577   for (i=0,j=0; i<n; j++) {
1578     row = idx[i];
1579     if (row%bs!=0) { /* Not the begining of a block */
1580       sizes[j] = 1;
1581       i++;
1582     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1583       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1584       i++;
1585     } else { /* Begining of the block, so check if the complete block exists */
1586       flg = PETSC_TRUE;
1587       for (k=1; k<bs; k++) {
1588         if (row+k != idx[i+k]) { /* break in the block */
1589           flg = PETSC_FALSE;
1590           break;
1591         }
1592       }
1593       if (flg) { /* No break in the bs */
1594         sizes[j] = bs;
1595         i+= bs;
1596       } else {
1597         sizes[j] = 1;
1598         i++;
1599       }
1600     }
1601   }
1602   *bs_max = j;
1603   PetscFunctionReturn(0);
1604 }
1605 
1606 #undef __FUNCT__
1607 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1608 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1609 {
1610   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1611   PetscErrorCode ierr;
1612   PetscInt       i,j,k,count,*rows;
1613   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1614   PetscScalar    zero = 0.0;
1615   MatScalar      *aa;
1616 
1617   PetscFunctionBegin;
1618   /* Make a copy of the IS and  sort it */
1619   /* allocate memory for rows,sizes */
1620   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1621   sizes = rows + is_n;
1622 
1623   /* copy IS values to rows, and sort them */
1624   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1625   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1626   if (baij->keepzeroedrows) {
1627     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1628     bs_max = is_n;
1629     A->same_nonzero = PETSC_TRUE;
1630   } else {
1631     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1632     A->same_nonzero = PETSC_FALSE;
1633   }
1634 
1635   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1636     row   = rows[j];
1637     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1638     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1639     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1640     if (sizes[i] == bs && !baij->keepzeroedrows) {
1641       if (diag != 0.0) {
1642         if (baij->ilen[row/bs] > 0) {
1643           baij->ilen[row/bs]       = 1;
1644           baij->j[baij->i[row/bs]] = row/bs;
1645           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1646         }
1647         /* Now insert all the diagonal values for this bs */
1648         for (k=0; k<bs; k++) {
1649           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1650         }
1651       } else { /* (diag == 0.0) */
1652         baij->ilen[row/bs] = 0;
1653       } /* end (diag == 0.0) */
1654     } else { /* (sizes[i] != bs) */
1655 #if defined (PETSC_USE_DEBUG)
1656       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1657 #endif
1658       for (k=0; k<count; k++) {
1659         aa[0] =  zero;
1660         aa    += bs;
1661       }
1662       if (diag != 0.0) {
1663         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1664       }
1665     }
1666   }
1667 
1668   ierr = PetscFree(rows);CHKERRQ(ierr);
1669   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1675 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1676 {
1677   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1678   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1679   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1680   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1681   PetscErrorCode ierr;
1682   PetscInt       ridx,cidx,bs2=a->bs2;
1683   PetscTruth     roworiented=a->roworiented;
1684   MatScalar      *ap,value,*aa=a->a,*bap;
1685 
1686   PetscFunctionBegin;
1687   for (k=0; k<m; k++) { /* loop over added rows */
1688     row  = im[k];
1689     brow = row/bs;
1690     if (row < 0) continue;
1691 #if defined(PETSC_USE_DEBUG)
1692     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1693 #endif
1694     rp   = aj + ai[brow];
1695     ap   = aa + bs2*ai[brow];
1696     rmax = imax[brow];
1697     nrow = ailen[brow];
1698     low  = 0;
1699     high = nrow;
1700     for (l=0; l<n; l++) { /* loop over added columns */
1701       if (in[l] < 0) continue;
1702 #if defined(PETSC_USE_DEBUG)
1703       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1704 #endif
1705       col = in[l]; bcol = col/bs;
1706       ridx = row % bs; cidx = col % bs;
1707       if (roworiented) {
1708         value = v[l + k*n];
1709       } else {
1710         value = v[k + l*m];
1711       }
1712       if (col <= lastcol) low = 0; else high = nrow;
1713       lastcol = col;
1714       while (high-low > 7) {
1715         t = (low+high)/2;
1716         if (rp[t] > bcol) high = t;
1717         else              low  = t;
1718       }
1719       for (i=low; i<high; i++) {
1720         if (rp[i] > bcol) break;
1721         if (rp[i] == bcol) {
1722           bap  = ap +  bs2*i + bs*cidx + ridx;
1723           if (is == ADD_VALUES) *bap += value;
1724           else                  *bap  = value;
1725           goto noinsert1;
1726         }
1727       }
1728       if (nonew == 1) goto noinsert1;
1729       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1730       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew);
1731       N = nrow++ - 1; high++;
1732       /* shift up all the later entries in this row */
1733       for (ii=N; ii>=i; ii--) {
1734         rp[ii+1] = rp[ii];
1735         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1736       }
1737       if (N>=i) {
1738         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1739       }
1740       rp[i]                      = bcol;
1741       ap[bs2*i + bs*cidx + ridx] = value;
1742       a->nz++;
1743       noinsert1:;
1744       low = i;
1745     }
1746     ailen[brow] = nrow;
1747   }
1748   A->same_nonzero = PETSC_FALSE;
1749   PetscFunctionReturn(0);
1750 }
1751 
1752 
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1755 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1756 {
1757   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1758   Mat            outA;
1759   PetscErrorCode ierr;
1760   PetscTruth     row_identity,col_identity;
1761 
1762   PetscFunctionBegin;
1763   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1764   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1765   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1766   if (!row_identity || !col_identity) {
1767     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1768   }
1769 
1770   outA          = inA;
1771   inA->factor   = FACTOR_LU;
1772 
1773   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1774 
1775   a->row        = row;
1776   a->col        = col;
1777   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1778   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1779 
1780   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1781   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1782   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1783 
1784   /*
1785       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1786       for ILU(0) factorization with natural ordering
1787   */
1788   if (inA->rmap.bs < 8) {
1789     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1790   } else {
1791     if (!a->solve_work) {
1792       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1793       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1794     }
1795   }
1796 
1797   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1798 
1799   PetscFunctionReturn(0);
1800 }
1801 
1802 EXTERN_C_BEGIN
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1805 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1806 {
1807   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1808   PetscInt    i,nz,nbs;
1809 
1810   PetscFunctionBegin;
1811   nz  = baij->maxnz/baij->bs2;
1812   nbs = baij->nbs;
1813   for (i=0; i<nz; i++) {
1814     baij->j[i] = indices[i];
1815   }
1816   baij->nz = nz;
1817   for (i=0; i<nbs; i++) {
1818     baij->ilen[i] = baij->imax[i];
1819   }
1820 
1821   PetscFunctionReturn(0);
1822 }
1823 EXTERN_C_END
1824 
1825 #undef __FUNCT__
1826 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1827 /*@
1828     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1829        in the matrix.
1830 
1831   Input Parameters:
1832 +  mat - the SeqBAIJ matrix
1833 -  indices - the column indices
1834 
1835   Level: advanced
1836 
1837   Notes:
1838     This can be called if you have precomputed the nonzero structure of the
1839   matrix and want to provide it to the matrix object to improve the performance
1840   of the MatSetValues() operation.
1841 
1842     You MUST have set the correct numbers of nonzeros per row in the call to
1843   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1844 
1845     MUST be called before any calls to MatSetValues();
1846 
1847 @*/
1848 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1849 {
1850   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1851 
1852   PetscFunctionBegin;
1853   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1854   PetscValidPointer(indices,2);
1855   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1856   if (f) {
1857     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1858   } else {
1859     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1860   }
1861   PetscFunctionReturn(0);
1862 }
1863 
1864 #undef __FUNCT__
1865 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1866 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1867 {
1868   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1869   PetscErrorCode ierr;
1870   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1871   PetscReal      atmp;
1872   PetscScalar    *x,zero = 0.0;
1873   MatScalar      *aa;
1874   PetscInt       ncols,brow,krow,kcol;
1875 
1876   PetscFunctionBegin;
1877   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1878   bs   = A->rmap.bs;
1879   aa   = a->a;
1880   ai   = a->i;
1881   aj   = a->j;
1882   mbs = a->mbs;
1883 
1884   ierr = VecSet(v,zero);CHKERRQ(ierr);
1885   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1886   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1887   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1888   for (i=0; i<mbs; i++) {
1889     ncols = ai[1] - ai[0]; ai++;
1890     brow  = bs*i;
1891     for (j=0; j<ncols; j++){
1892       /* bcol = bs*(*aj); */
1893       for (kcol=0; kcol<bs; kcol++){
1894         for (krow=0; krow<bs; krow++){
1895           atmp = PetscAbsScalar(*aa); aa++;
1896           row = brow + krow;    /* row index */
1897           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1898           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1899         }
1900       }
1901       aj++;
1902     }
1903   }
1904   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatCopy_SeqBAIJ"
1910 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1911 {
1912   PetscErrorCode ierr;
1913 
1914   PetscFunctionBegin;
1915   /* If the two matrices have the same copy implementation, use fast copy. */
1916   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1917     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1918     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1919 
1920     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1921       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1922     }
1923     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1924   } else {
1925     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1926   }
1927   PetscFunctionReturn(0);
1928 }
1929 
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1932 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1933 {
1934   PetscErrorCode ierr;
1935 
1936   PetscFunctionBegin;
1937   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1938   PetscFunctionReturn(0);
1939 }
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1943 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1944 {
1945   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1946   PetscFunctionBegin;
1947   *array = a->a;
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1953 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1954 {
1955   PetscFunctionBegin;
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #include "petscblaslapack.h"
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1962 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1963 {
1964   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1965   PetscErrorCode ierr;
1966   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1967   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1968 
1969   PetscFunctionBegin;
1970   if (str == SAME_NONZERO_PATTERN) {
1971     PetscScalar alpha = a;
1972     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1973   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1974     if (y->xtoy && y->XtoY != X) {
1975       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1976       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1977     }
1978     if (!y->xtoy) { /* get xtoy */
1979       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1980       y->XtoY = X;
1981     }
1982     bs2 = bs*bs;
1983     for (i=0; i<x->nz; i++) {
1984       j = 0;
1985       while (j < bs2){
1986         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1987         j++;
1988       }
1989     }
1990     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1991   } else {
1992     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatRealPart_SeqBAIJ"
1999 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2000 {
2001   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2002   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2003   PetscScalar    *aa = a->a;
2004 
2005   PetscFunctionBegin;
2006   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2007   PetscFunctionReturn(0);
2008 }
2009 
2010 #undef __FUNCT__
2011 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2012 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2013 {
2014   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2015   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2016   PetscScalar    *aa = a->a;
2017 
2018   PetscFunctionBegin;
2019   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 
2024 /* -------------------------------------------------------------------*/
2025 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2026        MatGetRow_SeqBAIJ,
2027        MatRestoreRow_SeqBAIJ,
2028        MatMult_SeqBAIJ_N,
2029 /* 4*/ MatMultAdd_SeqBAIJ_N,
2030        MatMultTranspose_SeqBAIJ,
2031        MatMultTransposeAdd_SeqBAIJ,
2032        MatSolve_SeqBAIJ_N,
2033        0,
2034        0,
2035 /*10*/ 0,
2036        MatLUFactor_SeqBAIJ,
2037        0,
2038        0,
2039        MatTranspose_SeqBAIJ,
2040 /*15*/ MatGetInfo_SeqBAIJ,
2041        MatEqual_SeqBAIJ,
2042        MatGetDiagonal_SeqBAIJ,
2043        MatDiagonalScale_SeqBAIJ,
2044        MatNorm_SeqBAIJ,
2045 /*20*/ 0,
2046        MatAssemblyEnd_SeqBAIJ,
2047        0,
2048        MatSetOption_SeqBAIJ,
2049        MatZeroEntries_SeqBAIJ,
2050 /*25*/ MatZeroRows_SeqBAIJ,
2051        MatLUFactorSymbolic_SeqBAIJ,
2052        MatLUFactorNumeric_SeqBAIJ_N,
2053        MatCholeskyFactorSymbolic_SeqBAIJ,
2054        MatCholeskyFactorNumeric_SeqBAIJ_N,
2055 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2056        MatILUFactorSymbolic_SeqBAIJ,
2057        MatICCFactorSymbolic_SeqBAIJ,
2058        MatGetArray_SeqBAIJ,
2059        MatRestoreArray_SeqBAIJ,
2060 /*35*/ MatDuplicate_SeqBAIJ,
2061        0,
2062        0,
2063        MatILUFactor_SeqBAIJ,
2064        0,
2065 /*40*/ MatAXPY_SeqBAIJ,
2066        MatGetSubMatrices_SeqBAIJ,
2067        MatIncreaseOverlap_SeqBAIJ,
2068        MatGetValues_SeqBAIJ,
2069        MatCopy_SeqBAIJ,
2070 /*45*/ 0,
2071        MatScale_SeqBAIJ,
2072        0,
2073        0,
2074        0,
2075 /*50*/ 0,
2076        MatGetRowIJ_SeqBAIJ,
2077        MatRestoreRowIJ_SeqBAIJ,
2078        0,
2079        0,
2080 /*55*/ 0,
2081        0,
2082        0,
2083        0,
2084        MatSetValuesBlocked_SeqBAIJ,
2085 /*60*/ MatGetSubMatrix_SeqBAIJ,
2086        MatDestroy_SeqBAIJ,
2087        MatView_SeqBAIJ,
2088        0,
2089        0,
2090 /*65*/ 0,
2091        0,
2092        0,
2093        0,
2094        0,
2095 /*70*/ MatGetRowMax_SeqBAIJ,
2096        MatConvert_Basic,
2097        0,
2098        0,
2099        0,
2100 /*75*/ 0,
2101        0,
2102        0,
2103        0,
2104        0,
2105 /*80*/ 0,
2106        0,
2107        0,
2108        0,
2109        MatLoad_SeqBAIJ,
2110 /*85*/ 0,
2111        0,
2112        0,
2113        0,
2114        0,
2115 /*90*/ 0,
2116        0,
2117        0,
2118        0,
2119        0,
2120 /*95*/ 0,
2121        0,
2122        0,
2123        0,
2124        0,
2125 /*100*/0,
2126        0,
2127        0,
2128        0,
2129        0,
2130 /*105*/0,
2131        MatRealPart_SeqBAIJ,
2132        MatImaginaryPart_SeqBAIJ
2133 };
2134 
2135 EXTERN_C_BEGIN
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2138 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2139 {
2140   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2141   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2142   PetscErrorCode ierr;
2143 
2144   PetscFunctionBegin;
2145   if (aij->nonew != 1) {
2146     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2147   }
2148 
2149   /* allocate space for values if not already there */
2150   if (!aij->saved_values) {
2151     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2152   }
2153 
2154   /* copy values over */
2155   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 EXTERN_C_END
2159 
2160 EXTERN_C_BEGIN
2161 #undef __FUNCT__
2162 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2163 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2164 {
2165   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2166   PetscErrorCode ierr;
2167   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2168 
2169   PetscFunctionBegin;
2170   if (aij->nonew != 1) {
2171     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2172   }
2173   if (!aij->saved_values) {
2174     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2175   }
2176 
2177   /* copy values over */
2178   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2179   PetscFunctionReturn(0);
2180 }
2181 EXTERN_C_END
2182 
2183 EXTERN_C_BEGIN
2184 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2186 EXTERN_C_END
2187 
2188 EXTERN_C_BEGIN
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2191 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2192 {
2193   Mat_SeqBAIJ    *b;
2194   PetscErrorCode ierr;
2195   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2196   PetscTruth     flg,skipallocation = PETSC_FALSE;
2197 
2198   PetscFunctionBegin;
2199 
2200   if (nz == MAT_SKIP_ALLOCATION) {
2201     skipallocation = PETSC_TRUE;
2202     nz             = 0;
2203   }
2204 
2205   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2206     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2207   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2208 
2209   if (nnz && newbs != bs) {
2210     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2211   }
2212   bs   = newbs;
2213 
2214   B->rmap.bs = B->cmap.bs = bs;
2215   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2216   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2217 
2218   B->preallocated = PETSC_TRUE;
2219 
2220   mbs  = B->rmap.n/bs;
2221   nbs  = B->cmap.n/bs;
2222   bs2  = bs*bs;
2223 
2224   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2225     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2226   }
2227 
2228   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2229   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2230   if (nnz) {
2231     for (i=0; i<mbs; i++) {
2232       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2233       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2234     }
2235   }
2236 
2237   b       = (Mat_SeqBAIJ*)B->data;
2238   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2239     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2240   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2241 
2242   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2243   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2244   if (!flg) {
2245     switch (bs) {
2246     case 1:
2247       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2248       B->ops->mult            = MatMult_SeqBAIJ_1;
2249       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2250       break;
2251     case 2:
2252       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2253       B->ops->mult            = MatMult_SeqBAIJ_2;
2254       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2255       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2256       break;
2257     case 3:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2259       B->ops->mult            = MatMult_SeqBAIJ_3;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2261       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2262       break;
2263     case 4:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2265       B->ops->mult            = MatMult_SeqBAIJ_4;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2268       break;
2269     case 5:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2271       B->ops->mult            = MatMult_SeqBAIJ_5;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2274       break;
2275     case 6:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2277       B->ops->mult            = MatMult_SeqBAIJ_6;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2279       break;
2280     case 7:
2281       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2282       B->ops->mult            = MatMult_SeqBAIJ_7;
2283       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2284       break;
2285     default:
2286       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2287       B->ops->mult            = MatMult_SeqBAIJ_N;
2288       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2289       break;
2290     }
2291   }
2292   B->rmap.bs      = bs;
2293   b->mbs     = mbs;
2294   b->nbs     = nbs;
2295   if (!skipallocation) {
2296     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2297     /* b->ilen will count nonzeros in each block row so far. */
2298     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2299     if (!nnz) {
2300       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2301       else if (nz <= 0)        nz = 1;
2302       for (i=0; i<mbs; i++) b->imax[i] = nz;
2303       nz = nz*mbs;
2304     } else {
2305       nz = 0;
2306       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2307     }
2308 
2309     /* allocate the matrix space */
2310     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2311     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2312     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2313     b->singlemalloc = PETSC_TRUE;
2314 
2315     b->i[0] = 0;
2316     for (i=1; i<mbs+1; i++) {
2317       b->i[i] = b->i[i-1] + b->imax[i-1];
2318     }
2319     b->free_a     = PETSC_TRUE;
2320     b->free_ij    = PETSC_TRUE;
2321   } else {
2322     b->free_a     = PETSC_FALSE;
2323     b->free_ij    = PETSC_FALSE;
2324   }
2325 
2326   B->rmap.bs          = bs;
2327   b->bs2              = bs2;
2328   b->mbs              = mbs;
2329   b->nz               = 0;
2330   b->maxnz            = nz*bs2;
2331   B->info.nz_unneeded = (PetscReal)b->maxnz;
2332   PetscFunctionReturn(0);
2333 }
2334 EXTERN_C_END
2335 
2336 /*MC
2337    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2338    block sparse compressed row format.
2339 
2340    Options Database Keys:
2341 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2342 
2343   Level: beginner
2344 
2345 .seealso: MatCreateSeqBAIJ()
2346 M*/
2347 
2348 EXTERN_C_BEGIN
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatCreate_SeqBAIJ"
2351 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2352 {
2353   PetscErrorCode ierr;
2354   PetscMPIInt    size;
2355   Mat_SeqBAIJ    *b;
2356 
2357   PetscFunctionBegin;
2358   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2359   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2360 
2361   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2362   B->data = (void*)b;
2363   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2364   B->factor           = 0;
2365   B->mapping          = 0;
2366   b->row              = 0;
2367   b->col              = 0;
2368   b->icol             = 0;
2369   b->reallocs         = 0;
2370   b->saved_values     = 0;
2371 #if defined(PETSC_USE_MAT_SINGLE)
2372   b->setvalueslen     = 0;
2373   b->setvaluescopy    = PETSC_NULL;
2374 #endif
2375 
2376   b->sorted           = PETSC_FALSE;
2377   b->roworiented      = PETSC_TRUE;
2378   b->nonew            = 0;
2379   b->diag             = 0;
2380   b->solve_work       = 0;
2381   b->mult_work        = 0;
2382   B->spptr            = 0;
2383   B->info.nz_unneeded = (PetscReal)b->maxnz;
2384   b->keepzeroedrows   = PETSC_FALSE;
2385   b->xtoy              = 0;
2386   b->XtoY              = 0;
2387   b->compressedrow.use     = PETSC_FALSE;
2388   b->compressedrow.nrows   = 0;
2389   b->compressedrow.i       = PETSC_NULL;
2390   b->compressedrow.rindex  = PETSC_NULL;
2391   b->compressedrow.checked = PETSC_FALSE;
2392   B->same_nonzero          = PETSC_FALSE;
2393 
2394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2395                                      "MatInvertBlockDiagonal_SeqBAIJ",
2396                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2398                                      "MatStoreValues_SeqBAIJ",
2399                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2401                                      "MatRetrieveValues_SeqBAIJ",
2402                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2404                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2405                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2407                                      "MatConvert_SeqBAIJ_SeqAIJ",
2408                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2410                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2411                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2413                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2414                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2416   PetscFunctionReturn(0);
2417 }
2418 EXTERN_C_END
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2422 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2423 {
2424   Mat            C;
2425   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2426   PetscErrorCode ierr;
2427   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2428 
2429   PetscFunctionBegin;
2430   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2431 
2432   *B = 0;
2433   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2434   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2435   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2436   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2437   c    = (Mat_SeqBAIJ*)C->data;
2438 
2439   C->rmap.N   = A->rmap.N;
2440   C->cmap.N   = A->cmap.N;
2441   C->rmap.bs  = A->rmap.bs;
2442   c->bs2 = a->bs2;
2443   c->mbs = a->mbs;
2444   c->nbs = a->nbs;
2445 
2446   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2447   for (i=0; i<mbs; i++) {
2448     c->imax[i] = a->imax[i];
2449     c->ilen[i] = a->ilen[i];
2450   }
2451 
2452   /* allocate the matrix space */
2453   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2454   c->singlemalloc = PETSC_TRUE;
2455   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2456   if (mbs > 0) {
2457     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2458     if (cpvalues == MAT_COPY_VALUES) {
2459       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2460     } else {
2461       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2462     }
2463   }
2464   c->sorted      = a->sorted;
2465   c->roworiented = a->roworiented;
2466   c->nonew       = a->nonew;
2467 
2468   if (a->diag) {
2469     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2470     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2471     for (i=0; i<mbs; i++) {
2472       c->diag[i] = a->diag[i];
2473     }
2474   } else c->diag        = 0;
2475   c->nz                 = a->nz;
2476   c->maxnz              = a->maxnz;
2477   c->solve_work         = 0;
2478   c->mult_work          = 0;
2479   c->free_a             = PETSC_TRUE;
2480   c->free_ij            = PETSC_TRUE;
2481   C->preallocated       = PETSC_TRUE;
2482   C->assembled          = PETSC_TRUE;
2483 
2484   c->compressedrow.use     = a->compressedrow.use;
2485   c->compressedrow.nrows   = a->compressedrow.nrows;
2486   c->compressedrow.checked = a->compressedrow.checked;
2487   if ( a->compressedrow.checked && a->compressedrow.use){
2488     i = a->compressedrow.nrows;
2489     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2490     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2491     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2492     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2493   } else {
2494     c->compressedrow.use    = PETSC_FALSE;
2495     c->compressedrow.i      = PETSC_NULL;
2496     c->compressedrow.rindex = PETSC_NULL;
2497   }
2498   C->same_nonzero = A->same_nonzero;
2499   *B = C;
2500   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatLoad_SeqBAIJ"
2506 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2507 {
2508   Mat_SeqBAIJ    *a;
2509   Mat            B;
2510   PetscErrorCode ierr;
2511   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2512   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2513   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2514   PetscInt       *masked,nmask,tmp,bs2,ishift;
2515   PetscMPIInt    size;
2516   int            fd;
2517   PetscScalar    *aa;
2518   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2519 
2520   PetscFunctionBegin;
2521   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2522     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2523   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2524   bs2  = bs*bs;
2525 
2526   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2527   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2528   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2529   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2530   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2531   M = header[1]; N = header[2]; nz = header[3];
2532 
2533   if (header[3] < 0) {
2534     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2535   }
2536 
2537   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2538 
2539   /*
2540      This code adds extra rows to make sure the number of rows is
2541     divisible by the blocksize
2542   */
2543   mbs        = M/bs;
2544   extra_rows = bs - M + bs*(mbs);
2545   if (extra_rows == bs) extra_rows = 0;
2546   else                  mbs++;
2547   if (extra_rows) {
2548     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2549   }
2550 
2551   /* read in row lengths */
2552   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2553   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2554   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2555 
2556   /* read in column indices */
2557   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2558   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2559   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2560 
2561   /* loop over row lengths determining block row lengths */
2562   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2563   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2564   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2565   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2566   masked   = mask + mbs;
2567   rowcount = 0; nzcount = 0;
2568   for (i=0; i<mbs; i++) {
2569     nmask = 0;
2570     for (j=0; j<bs; j++) {
2571       kmax = rowlengths[rowcount];
2572       for (k=0; k<kmax; k++) {
2573         tmp = jj[nzcount++]/bs;
2574         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2575       }
2576       rowcount++;
2577     }
2578     browlengths[i] += nmask;
2579     /* zero out the mask elements we set */
2580     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2581   }
2582 
2583   /* create our matrix */
2584   ierr = MatCreate(comm,&B);
2585   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2586   ierr = MatSetType(B,type);CHKERRQ(ierr);
2587   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2588   a = (Mat_SeqBAIJ*)B->data;
2589 
2590   /* set matrix "i" values */
2591   a->i[0] = 0;
2592   for (i=1; i<= mbs; i++) {
2593     a->i[i]      = a->i[i-1] + browlengths[i-1];
2594     a->ilen[i-1] = browlengths[i-1];
2595   }
2596   a->nz         = 0;
2597   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2598 
2599   /* read in nonzero values */
2600   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2601   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2602   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2603 
2604   /* set "a" and "j" values into matrix */
2605   nzcount = 0; jcount = 0;
2606   for (i=0; i<mbs; i++) {
2607     nzcountb = nzcount;
2608     nmask    = 0;
2609     for (j=0; j<bs; j++) {
2610       kmax = rowlengths[i*bs+j];
2611       for (k=0; k<kmax; k++) {
2612         tmp = jj[nzcount++]/bs;
2613 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2614       }
2615     }
2616     /* sort the masked values */
2617     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2618 
2619     /* set "j" values into matrix */
2620     maskcount = 1;
2621     for (j=0; j<nmask; j++) {
2622       a->j[jcount++]  = masked[j];
2623       mask[masked[j]] = maskcount++;
2624     }
2625     /* set "a" values into matrix */
2626     ishift = bs2*a->i[i];
2627     for (j=0; j<bs; j++) {
2628       kmax = rowlengths[i*bs+j];
2629       for (k=0; k<kmax; k++) {
2630         tmp       = jj[nzcountb]/bs ;
2631         block     = mask[tmp] - 1;
2632         point     = jj[nzcountb] - bs*tmp;
2633         idx       = ishift + bs2*block + j + bs*point;
2634         a->a[idx] = (MatScalar)aa[nzcountb++];
2635       }
2636     }
2637     /* zero out the mask elements we set */
2638     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2639   }
2640   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2641 
2642   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2643   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2644   ierr = PetscFree(aa);CHKERRQ(ierr);
2645   ierr = PetscFree(jj);CHKERRQ(ierr);
2646   ierr = PetscFree(mask);CHKERRQ(ierr);
2647 
2648   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2649   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatView_Private(B);CHKERRQ(ierr);
2651 
2652   *A = B;
2653   PetscFunctionReturn(0);
2654 }
2655 
2656 #undef __FUNCT__
2657 #define __FUNCT__ "MatCreateSeqBAIJ"
2658 /*@C
2659    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2660    compressed row) format.  For good matrix assembly performance the
2661    user should preallocate the matrix storage by setting the parameter nz
2662    (or the array nnz).  By setting these parameters accurately, performance
2663    during matrix assembly can be increased by more than a factor of 50.
2664 
2665    Collective on MPI_Comm
2666 
2667    Input Parameters:
2668 +  comm - MPI communicator, set to PETSC_COMM_SELF
2669 .  bs - size of block
2670 .  m - number of rows
2671 .  n - number of columns
2672 .  nz - number of nonzero blocks  per block row (same for all rows)
2673 -  nnz - array containing the number of nonzero blocks in the various block rows
2674          (possibly different for each block row) or PETSC_NULL
2675 
2676    Output Parameter:
2677 .  A - the matrix
2678 
2679    Options Database Keys:
2680 .   -mat_no_unroll - uses code that does not unroll the loops in the
2681                      block calculations (much slower)
2682 .    -mat_block_size - size of the blocks to use
2683 
2684    Level: intermediate
2685 
2686    Notes:
2687    The number of rows and columns must be divisible by blocksize.
2688 
2689    If the nnz parameter is given then the nz parameter is ignored
2690 
2691    A nonzero block is any block that as 1 or more nonzeros in it
2692 
2693    The block AIJ format is fully compatible with standard Fortran 77
2694    storage.  That is, the stored row and column indices can begin at
2695    either one (as in Fortran) or zero.  See the users' manual for details.
2696 
2697    Specify the preallocated storage with either nz or nnz (not both).
2698    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2699    allocation.  For additional details, see the users manual chapter on
2700    matrices.
2701 
2702 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2703 @*/
2704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2705 {
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2710   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2711   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2712   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2718 /*@C
2719    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2720    per row in the matrix. For good matrix assembly performance the
2721    user should preallocate the matrix storage by setting the parameter nz
2722    (or the array nnz).  By setting these parameters accurately, performance
2723    during matrix assembly can be increased by more than a factor of 50.
2724 
2725    Collective on MPI_Comm
2726 
2727    Input Parameters:
2728 +  A - the matrix
2729 .  bs - size of block
2730 .  nz - number of block nonzeros per block row (same for all rows)
2731 -  nnz - array containing the number of block nonzeros in the various block rows
2732          (possibly different for each block row) or PETSC_NULL
2733 
2734    Options Database Keys:
2735 .   -mat_no_unroll - uses code that does not unroll the loops in the
2736                      block calculations (much slower)
2737 .    -mat_block_size - size of the blocks to use
2738 
2739    Level: intermediate
2740 
2741    Notes:
2742    If the nnz parameter is given then the nz parameter is ignored
2743 
2744    The block AIJ format is fully compatible with standard Fortran 77
2745    storage.  That is, the stored row and column indices can begin at
2746    either one (as in Fortran) or zero.  See the users' manual for details.
2747 
2748    Specify the preallocated storage with either nz or nnz (not both).
2749    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2750    allocation.  For additional details, see the users manual chapter on
2751    matrices.
2752 
2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2754 @*/
2755 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2756 {
2757   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2758 
2759   PetscFunctionBegin;
2760   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2761   if (f) {
2762     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2763   }
2764   PetscFunctionReturn(0);
2765 }
2766 
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2769 /*@
2770      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2771               (upper triangular entries in CSR format) provided by the user.
2772 
2773      Collective on MPI_Comm
2774 
2775    Input Parameters:
2776 +  comm - must be an MPI communicator of size 1
2777 .  bs - size of block
2778 .  m - number of rows
2779 .  n - number of columns
2780 .  i - row indices
2781 .  j - column indices
2782 -  a - matrix values
2783 
2784    Output Parameter:
2785 .  mat - the matrix
2786 
2787    Level: intermediate
2788 
2789    Notes:
2790        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2791     once the matrix is destroyed
2792 
2793        You cannot set new nonzero locations into this matrix, that will generate an error.
2794 
2795        The i and j indices are 0 based
2796 
2797 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2798 
2799 @*/
2800 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2801 {
2802   PetscErrorCode ierr;
2803   PetscInt       ii;
2804   Mat_SeqBAIJ    *baij;
2805 
2806   PetscFunctionBegin;
2807   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2808   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2809 
2810   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2811   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2812   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2813   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2814   baij = (Mat_SeqBAIJ*)(*mat)->data;
2815   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2816 
2817   baij->i = i;
2818   baij->j = j;
2819   baij->a = a;
2820   baij->singlemalloc = PETSC_FALSE;
2821   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2822   baij->free_a       = PETSC_FALSE;
2823   baij->free_ij       = PETSC_FALSE;
2824 
2825   for (ii=0; ii<m; ii++) {
2826     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2827 #if defined(PETSC_USE_DEBUG)
2828     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2829 #endif
2830   }
2831 #if defined(PETSC_USE_DEBUG)
2832   for (ii=0; ii<baij->i[m]; ii++) {
2833     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2834     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2835   }
2836 #endif
2837 
2838   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2839   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   PetscFunctionReturn(0);
2841 }
2842