xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 71f87433c2d5ca8a2055e7bac4e3ff8159d57028)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt       n = a->mbs,i;
785 
786   PetscFunctionBegin;
787   *nn = n;
788   if (!ia) PetscFunctionReturn(0);
789   if (symmetric) {
790     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
791   } else if (oshift == 1) {
792     /* temporarily add 1 to i and j indices */
793     PetscInt nz = a->i[n];
794     for (i=0; i<nz; i++) a->j[i]++;
795     for (i=0; i<n+1; i++) a->i[i]++;
796     *ia = a->i; *ja = a->j;
797   } else {
798     *ia = a->i; *ja = a->j;
799   }
800 
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807 {
808   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
809   PetscErrorCode ierr;
810   PetscInt       i,n = a->mbs;
811 
812   PetscFunctionBegin;
813   if (!ia) PetscFunctionReturn(0);
814   if (symmetric) {
815     ierr = PetscFree(*ia);CHKERRQ(ierr);
816     ierr = PetscFree(*ja);CHKERRQ(ierr);
817   } else if (oshift == 1) {
818     PetscInt nz = a->i[n]-1;
819     for (i=0; i<nz; i++) a->j[i]--;
820     for (i=0; i<n+1; i++) a->i[i]--;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_SeqBAIJ"
827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835 #endif
836   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(a->diag);CHKERRQ(ierr);
844   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
845   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
846   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
847   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
850 #if defined(PETSC_USE_MAT_SINGLE)
851   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
852 #endif
853   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetOption_SeqBAIJ"
871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872 {
873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   switch (op) {
878   case MAT_ROW_ORIENTED:
879     a->roworiented    = PETSC_TRUE;
880     break;
881   case MAT_COLUMN_ORIENTED:
882     a->roworiented    = PETSC_FALSE;
883     break;
884   case MAT_COLUMNS_SORTED:
885     a->sorted         = PETSC_TRUE;
886     break;
887   case MAT_COLUMNS_UNSORTED:
888     a->sorted         = PETSC_FALSE;
889     break;
890   case MAT_KEEP_ZEROED_ROWS:
891     a->keepzeroedrows = PETSC_TRUE;
892     break;
893   case MAT_NO_NEW_NONZERO_LOCATIONS:
894     a->nonew          = 1;
895     break;
896   case MAT_NEW_NONZERO_LOCATION_ERR:
897     a->nonew          = -1;
898     break;
899   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900     a->nonew          = -2;
901     break;
902   case MAT_YES_NEW_NONZERO_LOCATIONS:
903     a->nonew          = 0;
904     break;
905   case MAT_ROWS_SORTED:
906   case MAT_ROWS_UNSORTED:
907   case MAT_YES_NEW_DIAGONALS:
908   case MAT_IGNORE_OFF_PROC_ENTRIES:
909   case MAT_USE_HASH_TABLE:
910     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
911     break;
912   case MAT_NO_NEW_DIAGONALS:
913     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914   case MAT_SYMMETRIC:
915   case MAT_STRUCTURALLY_SYMMETRIC:
916   case MAT_NOT_SYMMETRIC:
917   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918   case MAT_HERMITIAN:
919   case MAT_NOT_HERMITIAN:
920   case MAT_SYMMETRY_ETERNAL:
921   case MAT_NOT_SYMMETRY_ETERNAL:
922     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
923     break;
924   default:
925     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
926   }
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatGetRow_SeqBAIJ"
932 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
933 {
934   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
935   PetscErrorCode ierr;
936   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
937   MatScalar      *aa,*aa_i;
938   PetscScalar    *v_i;
939 
940   PetscFunctionBegin;
941   bs  = A->rmap.bs;
942   ai  = a->i;
943   aj  = a->j;
944   aa  = a->a;
945   bs2 = a->bs2;
946 
947   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
948 
949   bn  = row/bs;   /* Block number */
950   bp  = row % bs; /* Block Position */
951   M   = ai[bn+1] - ai[bn];
952   *nz = bs*M;
953 
954   if (v) {
955     *v = 0;
956     if (*nz) {
957       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
958       for (i=0; i<M; i++) { /* for each block in the block row */
959         v_i  = *v + i*bs;
960         aa_i = aa + bs2*(ai[bn] + i);
961         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
962       }
963     }
964   }
965 
966   if (idx) {
967     *idx = 0;
968     if (*nz) {
969       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
970       for (i=0; i<M; i++) { /* for each block in the block row */
971         idx_i = *idx + i*bs;
972         itmp  = bs*aj[ai[bn] + i];
973         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
974       }
975     }
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
982 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
988   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatTranspose_SeqBAIJ"
994 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
995 {
996   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
997   Mat            C;
998   PetscErrorCode ierr;
999   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1000   PetscInt       *rows,*cols,bs2=a->bs2;
1001   PetscScalar    *array;
1002 
1003   PetscFunctionBegin;
1004   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1005   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1006   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1007 
1008 #if defined(PETSC_USE_MAT_SINGLE)
1009   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1010   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1011 #else
1012   array = a->a;
1013 #endif
1014 
1015   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1016   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1017   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1018   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1019   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1020   ierr = PetscFree(col);CHKERRQ(ierr);
1021   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1022   cols = rows + bs;
1023   for (i=0; i<mbs; i++) {
1024     cols[0] = i*bs;
1025     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1026     len = ai[i+1] - ai[i];
1027     for (j=0; j<len; j++) {
1028       rows[0] = (*aj++)*bs;
1029       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1030       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1031       array += bs2;
1032     }
1033   }
1034   ierr = PetscFree(rows);CHKERRQ(ierr);
1035 #if defined(PETSC_USE_MAT_SINGLE)
1036   ierr = PetscFree(array);
1037 #endif
1038 
1039   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041 
1042   if (B) {
1043     *B = C;
1044   } else {
1045     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1046   }
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1052 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1057   int            fd;
1058   PetscScalar    *aa;
1059   FILE           *file;
1060 
1061   PetscFunctionBegin;
1062   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1063   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1064   col_lens[0] = MAT_FILE_COOKIE;
1065 
1066   col_lens[1] = A->rmap.N;
1067   col_lens[2] = A->cmap.n;
1068   col_lens[3] = a->nz*bs2;
1069 
1070   /* store lengths of each row and write (including header) to file */
1071   count = 0;
1072   for (i=0; i<a->mbs; i++) {
1073     for (j=0; j<bs; j++) {
1074       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1075     }
1076   }
1077   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1078   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1079 
1080   /* store column indices (zero start index) */
1081   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1082   count = 0;
1083   for (i=0; i<a->mbs; i++) {
1084     for (j=0; j<bs; j++) {
1085       for (k=a->i[i]; k<a->i[i+1]; k++) {
1086         for (l=0; l<bs; l++) {
1087           jj[count++] = bs*a->j[k] + l;
1088         }
1089       }
1090     }
1091   }
1092   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1093   ierr = PetscFree(jj);CHKERRQ(ierr);
1094 
1095   /* store nonzero values */
1096   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1097   count = 0;
1098   for (i=0; i<a->mbs; i++) {
1099     for (j=0; j<bs; j++) {
1100       for (k=a->i[i]; k<a->i[i+1]; k++) {
1101         for (l=0; l<bs; l++) {
1102           aa[count++] = a->a[bs2*k + l*bs + j];
1103         }
1104       }
1105     }
1106   }
1107   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1108   ierr = PetscFree(aa);CHKERRQ(ierr);
1109 
1110   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1111   if (file) {
1112     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1119 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1120 {
1121   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1122   PetscErrorCode    ierr;
1123   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1124   PetscViewerFormat format;
1125 
1126   PetscFunctionBegin;
1127   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1128   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1129     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1130   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1131     Mat aij;
1132     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1133     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1134     ierr = MatDestroy(aij);CHKERRQ(ierr);
1135   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1136      PetscFunctionReturn(0);
1137   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1138     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1139     for (i=0; i<a->mbs; i++) {
1140       for (j=0; j<bs; j++) {
1141         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1142         for (k=a->i[i]; k<a->i[i+1]; k++) {
1143           for (l=0; l<bs; l++) {
1144 #if defined(PETSC_USE_COMPLEX)
1145             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1147                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1150                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1153             }
1154 #else
1155             if (a->a[bs2*k + l*bs + j] != 0.0) {
1156               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1157             }
1158 #endif
1159           }
1160         }
1161         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1162       }
1163     }
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1165   } else {
1166     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1167     for (i=0; i<a->mbs; i++) {
1168       for (j=0; j<bs; j++) {
1169         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1170         for (k=a->i[i]; k<a->i[i+1]; k++) {
1171           for (l=0; l<bs; l++) {
1172 #if defined(PETSC_USE_COMPLEX)
1173             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1174               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1175                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1176             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1178                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             } else {
1180               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1181             }
1182 #else
1183             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1184 #endif
1185           }
1186         }
1187         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1188       }
1189     }
1190     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1191   }
1192   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1198 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1199 {
1200   Mat            A = (Mat) Aa;
1201   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1202   PetscErrorCode ierr;
1203   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1204   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1205   MatScalar      *aa;
1206   PetscViewer    viewer;
1207 
1208   PetscFunctionBegin;
1209 
1210   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1211   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1212 
1213   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1214 
1215   /* loop over matrix elements drawing boxes */
1216   color = PETSC_DRAW_BLUE;
1217   for (i=0,row=0; i<mbs; i++,row+=bs) {
1218     for (j=a->i[i]; j<a->i[i+1]; j++) {
1219       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1220       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1221       aa = a->a + j*bs2;
1222       for (k=0; k<bs; k++) {
1223         for (l=0; l<bs; l++) {
1224           if (PetscRealPart(*aa++) >=  0.) continue;
1225           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1226         }
1227       }
1228     }
1229   }
1230   color = PETSC_DRAW_CYAN;
1231   for (i=0,row=0; i<mbs; i++,row+=bs) {
1232     for (j=a->i[i]; j<a->i[i+1]; j++) {
1233       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1234       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1235       aa = a->a + j*bs2;
1236       for (k=0; k<bs; k++) {
1237         for (l=0; l<bs; l++) {
1238           if (PetscRealPart(*aa++) != 0.) continue;
1239           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1240         }
1241       }
1242     }
1243   }
1244 
1245   color = PETSC_DRAW_RED;
1246   for (i=0,row=0; i<mbs; i++,row+=bs) {
1247     for (j=a->i[i]; j<a->i[i+1]; j++) {
1248       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1249       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1250       aa = a->a + j*bs2;
1251       for (k=0; k<bs; k++) {
1252         for (l=0; l<bs; l++) {
1253           if (PetscRealPart(*aa++) <= 0.) continue;
1254           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1255         }
1256       }
1257     }
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1264 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1265 {
1266   PetscErrorCode ierr;
1267   PetscReal      xl,yl,xr,yr,w,h;
1268   PetscDraw      draw;
1269   PetscTruth     isnull;
1270 
1271   PetscFunctionBegin;
1272 
1273   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1274   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1275 
1276   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1277   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1278   xr += w;    yr += h;  xl = -w;     yl = -h;
1279   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1280   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1281   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatView_SeqBAIJ"
1287 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1288 {
1289   PetscErrorCode ierr;
1290   PetscTruth     iascii,isbinary,isdraw;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1295   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1296   if (iascii){
1297     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1298   } else if (isbinary) {
1299     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1300   } else if (isdraw) {
1301     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1302   } else {
1303     Mat B;
1304     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1305     ierr = MatView(B,viewer);CHKERRQ(ierr);
1306     ierr = MatDestroy(B);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1314 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1315 {
1316   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1317   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1318   PetscInt    *ai = a->i,*ailen = a->ilen;
1319   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1320   MatScalar   *ap,*aa = a->a,zero = 0.0;
1321 
1322   PetscFunctionBegin;
1323   for (k=0; k<m; k++) { /* loop over rows */
1324     row  = im[k]; brow = row/bs;
1325     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1326     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1327     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1328     nrow = ailen[brow];
1329     for (l=0; l<n; l++) { /* loop over columns */
1330       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1331       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1332       col  = in[l] ;
1333       bcol = col/bs;
1334       cidx = col%bs;
1335       ridx = row%bs;
1336       high = nrow;
1337       low  = 0; /* assume unsorted */
1338       while (high-low > 5) {
1339         t = (low+high)/2;
1340         if (rp[t] > bcol) high = t;
1341         else             low  = t;
1342       }
1343       for (i=low; i<high; i++) {
1344         if (rp[i] > bcol) break;
1345         if (rp[i] == bcol) {
1346           *v++ = ap[bs2*i+bs*cidx+ridx];
1347           goto finished;
1348         }
1349       }
1350       *v++ = zero;
1351       finished:;
1352     }
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #if defined(PETSC_USE_MAT_SINGLE)
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1360 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1361 {
1362   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1363   PetscErrorCode ierr;
1364   PetscInt       i,N = m*n*b->bs2;
1365   MatScalar      *vsingle;
1366 
1367   PetscFunctionBegin;
1368   if (N > b->setvalueslen) {
1369     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1370     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1371     b->setvalueslen  = N;
1372   }
1373   vsingle = b->setvaluescopy;
1374   for (i=0; i<N; i++) {
1375     vsingle[i] = v[i];
1376   }
1377   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 #endif
1381 
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1385 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1386 {
1387   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1388   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1389   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1390   PetscErrorCode  ierr;
1391   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1392   PetscTruth      roworiented=a->roworiented;
1393   const MatScalar *value = v;
1394   MatScalar       *ap,*aa = a->a,*bap;
1395 
1396   PetscFunctionBegin;
1397   if (roworiented) {
1398     stepval = (n-1)*bs;
1399   } else {
1400     stepval = (m-1)*bs;
1401   }
1402   for (k=0; k<m; k++) { /* loop over added rows */
1403     row  = im[k];
1404     if (row < 0) continue;
1405 #if defined(PETSC_USE_DEBUG)
1406     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1407 #endif
1408     rp   = aj + ai[row];
1409     ap   = aa + bs2*ai[row];
1410     rmax = imax[row];
1411     nrow = ailen[row];
1412     low  = 0;
1413     high = nrow;
1414     for (l=0; l<n; l++) { /* loop over added columns */
1415       if (in[l] < 0) continue;
1416 #if defined(PETSC_USE_DEBUG)
1417       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1418 #endif
1419       col = in[l];
1420       if (roworiented) {
1421         value = v + k*(stepval+bs)*bs + l*bs;
1422       } else {
1423         value = v + l*(stepval+bs)*bs + k*bs;
1424       }
1425       if (col <= lastcol) low = 0; else high = nrow;
1426       lastcol = col;
1427       while (high-low > 7) {
1428         t = (low+high)/2;
1429         if (rp[t] > col) high = t;
1430         else             low  = t;
1431       }
1432       for (i=low; i<high; i++) {
1433         if (rp[i] > col) break;
1434         if (rp[i] == col) {
1435           bap  = ap +  bs2*i;
1436           if (roworiented) {
1437             if (is == ADD_VALUES) {
1438               for (ii=0; ii<bs; ii++,value+=stepval) {
1439                 for (jj=ii; jj<bs2; jj+=bs) {
1440                   bap[jj] += *value++;
1441                 }
1442               }
1443             } else {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=ii; jj<bs2; jj+=bs) {
1446                   bap[jj] = *value++;
1447                 }
1448               }
1449             }
1450           } else {
1451             if (is == ADD_VALUES) {
1452               for (ii=0; ii<bs; ii++,value+=stepval) {
1453                 for (jj=0; jj<bs; jj++) {
1454                   *bap++ += *value++;
1455                 }
1456               }
1457             } else {
1458               for (ii=0; ii<bs; ii++,value+=stepval) {
1459                 for (jj=0; jj<bs; jj++) {
1460                   *bap++  = *value++;
1461                 }
1462               }
1463             }
1464           }
1465           goto noinsert2;
1466         }
1467       }
1468       if (nonew == 1) goto noinsert2;
1469       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1470       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1471       N = nrow++ - 1; high++;
1472       /* shift up all the later entries in this row */
1473       for (ii=N; ii>=i; ii--) {
1474         rp[ii+1] = rp[ii];
1475         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1476       }
1477       if (N >= i) {
1478         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479       }
1480       rp[i] = col;
1481       bap   = ap +  bs2*i;
1482       if (roworiented) {
1483         for (ii=0; ii<bs; ii++,value+=stepval) {
1484           for (jj=ii; jj<bs2; jj+=bs) {
1485             bap[jj] = *value++;
1486           }
1487         }
1488       } else {
1489         for (ii=0; ii<bs; ii++,value+=stepval) {
1490           for (jj=0; jj<bs; jj++) {
1491             *bap++  = *value++;
1492           }
1493         }
1494       }
1495       noinsert2:;
1496       low = i;
1497     }
1498     ailen[row] = nrow;
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506 {
1507   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1508   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1510   PetscErrorCode ierr;
1511   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1512   MatScalar      *aa = a->a,*ap;
1513   PetscReal      ratio=0.6;
1514 
1515   PetscFunctionBegin;
1516   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1517 
1518   if (m) rmax = ailen[0];
1519   for (i=1; i<mbs; i++) {
1520     /* move each row back by the amount of empty slots (fshift) before it*/
1521     fshift += imax[i-1] - ailen[i-1];
1522     rmax   = PetscMax(rmax,ailen[i]);
1523     if (fshift) {
1524       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525       N = ailen[i];
1526       for (j=0; j<N; j++) {
1527         ip[j-fshift] = ip[j];
1528         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1529       }
1530     }
1531     ai[i] = ai[i-1] + ailen[i-1];
1532   }
1533   if (mbs) {
1534     fshift += imax[mbs-1] - ailen[mbs-1];
1535     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536   }
1537   /* reset ilen and imax for each row */
1538   for (i=0; i<mbs; i++) {
1539     ailen[i] = imax[i] = ai[i+1] - ai[i];
1540   }
1541   a->nz = ai[mbs];
1542 
1543   /* diagonals may have moved, so kill the diagonal pointers */
1544   a->idiagvalid = PETSC_FALSE;
1545   if (fshift && a->diag) {
1546     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1547     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1548     a->diag = 0;
1549   }
1550   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1551   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1552   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1553   a->reallocs          = 0;
1554   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1555 
1556   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1557   if (a->compressedrow.use){
1558     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1559   }
1560 
1561   A->same_nonzero = PETSC_TRUE;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 /*
1566    This function returns an array of flags which indicate the locations of contiguous
1567    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1568    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569    Assume: sizes should be long enough to hold all the values.
1570 */
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574 {
1575   PetscInt   i,j,k,row;
1576   PetscTruth flg;
1577 
1578   PetscFunctionBegin;
1579   for (i=0,j=0; i<n; j++) {
1580     row = idx[i];
1581     if (row%bs!=0) { /* Not the begining of a block */
1582       sizes[j] = 1;
1583       i++;
1584     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1586       i++;
1587     } else { /* Begining of the block, so check if the complete block exists */
1588       flg = PETSC_TRUE;
1589       for (k=1; k<bs; k++) {
1590         if (row+k != idx[i+k]) { /* break in the block */
1591           flg = PETSC_FALSE;
1592           break;
1593         }
1594       }
1595       if (flg) { /* No break in the bs */
1596         sizes[j] = bs;
1597         i+= bs;
1598       } else {
1599         sizes[j] = 1;
1600         i++;
1601       }
1602     }
1603   }
1604   *bs_max = j;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1611 {
1612   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614   PetscInt       i,j,k,count,*rows;
1615   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1616   PetscScalar    zero = 0.0;
1617   MatScalar      *aa;
1618 
1619   PetscFunctionBegin;
1620   /* Make a copy of the IS and  sort it */
1621   /* allocate memory for rows,sizes */
1622   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1623   sizes = rows + is_n;
1624 
1625   /* copy IS values to rows, and sort them */
1626   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1627   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1628   if (baij->keepzeroedrows) {
1629     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1630     bs_max = is_n;
1631     A->same_nonzero = PETSC_TRUE;
1632   } else {
1633     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1634     A->same_nonzero = PETSC_FALSE;
1635   }
1636 
1637   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638     row   = rows[j];
1639     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1640     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642     if (sizes[i] == bs && !baij->keepzeroedrows) {
1643       if (diag != 0.0) {
1644         if (baij->ilen[row/bs] > 0) {
1645           baij->ilen[row/bs]       = 1;
1646           baij->j[baij->i[row/bs]] = row/bs;
1647           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1648         }
1649         /* Now insert all the diagonal values for this bs */
1650         for (k=0; k<bs; k++) {
1651           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1652         }
1653       } else { /* (diag == 0.0) */
1654         baij->ilen[row/bs] = 0;
1655       } /* end (diag == 0.0) */
1656     } else { /* (sizes[i] != bs) */
1657 #if defined (PETSC_USE_DEBUG)
1658       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659 #endif
1660       for (k=0; k<count; k++) {
1661         aa[0] =  zero;
1662         aa    += bs;
1663       }
1664       if (diag != 0.0) {
1665         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1666       }
1667     }
1668   }
1669 
1670   ierr = PetscFree(rows);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678 {
1679   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1680   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1681   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1683   PetscErrorCode ierr;
1684   PetscInt       ridx,cidx,bs2=a->bs2;
1685   PetscTruth     roworiented=a->roworiented;
1686   MatScalar      *ap,value,*aa=a->a,*bap;
1687 
1688   PetscFunctionBegin;
1689   for (k=0; k<m; k++) { /* loop over added rows */
1690     row  = im[k];
1691     brow = row/bs;
1692     if (row < 0) continue;
1693 #if defined(PETSC_USE_DEBUG)
1694     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1695 #endif
1696     rp   = aj + ai[brow];
1697     ap   = aa + bs2*ai[brow];
1698     rmax = imax[brow];
1699     nrow = ailen[brow];
1700     low  = 0;
1701     high = nrow;
1702     for (l=0; l<n; l++) { /* loop over added columns */
1703       if (in[l] < 0) continue;
1704 #if defined(PETSC_USE_DEBUG)
1705       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1706 #endif
1707       col = in[l]; bcol = col/bs;
1708       ridx = row % bs; cidx = col % bs;
1709       if (roworiented) {
1710         value = v[l + k*n];
1711       } else {
1712         value = v[k + l*m];
1713       }
1714       if (col <= lastcol) low = 0; else high = nrow;
1715       lastcol = col;
1716       while (high-low > 7) {
1717         t = (low+high)/2;
1718         if (rp[t] > bcol) high = t;
1719         else              low  = t;
1720       }
1721       for (i=low; i<high; i++) {
1722         if (rp[i] > bcol) break;
1723         if (rp[i] == bcol) {
1724           bap  = ap +  bs2*i + bs*cidx + ridx;
1725           if (is == ADD_VALUES) *bap += value;
1726           else                  *bap  = value;
1727           goto noinsert1;
1728         }
1729       }
1730       if (nonew == 1) goto noinsert1;
1731       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1732       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1733       N = nrow++ - 1; high++;
1734       /* shift up all the later entries in this row */
1735       for (ii=N; ii>=i; ii--) {
1736         rp[ii+1] = rp[ii];
1737         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1738       }
1739       if (N>=i) {
1740         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1741       }
1742       rp[i]                      = bcol;
1743       ap[bs2*i + bs*cidx + ridx] = value;
1744       a->nz++;
1745       noinsert1:;
1746       low = i;
1747     }
1748     ailen[brow] = nrow;
1749   }
1750   A->same_nonzero = PETSC_FALSE;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1757 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1758 {
1759   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1760   Mat            outA;
1761   PetscErrorCode ierr;
1762   PetscTruth     row_identity,col_identity;
1763 
1764   PetscFunctionBegin;
1765   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1766   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1767   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1768   if (!row_identity || !col_identity) {
1769     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1770   }
1771 
1772   outA          = inA;
1773   inA->factor   = FACTOR_LU;
1774 
1775   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1776 
1777   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1778   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1779   a->row = row;
1780   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1781   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1782   a->col = col;
1783 
1784   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1785   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1786   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1787 
1788   /*
1789       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1790       for ILU(0) factorization with natural ordering
1791   */
1792   if (inA->rmap.bs < 8) {
1793     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1794   } else {
1795     if (!a->solve_work) {
1796       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1797       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1798     }
1799   }
1800 
1801   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1802 
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 EXTERN_C_BEGIN
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1809 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1810 {
1811   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1812   PetscInt    i,nz,nbs;
1813 
1814   PetscFunctionBegin;
1815   nz  = baij->maxnz/baij->bs2;
1816   nbs = baij->nbs;
1817   for (i=0; i<nz; i++) {
1818     baij->j[i] = indices[i];
1819   }
1820   baij->nz = nz;
1821   for (i=0; i<nbs; i++) {
1822     baij->ilen[i] = baij->imax[i];
1823   }
1824 
1825   PetscFunctionReturn(0);
1826 }
1827 EXTERN_C_END
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1831 /*@
1832     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1833        in the matrix.
1834 
1835   Input Parameters:
1836 +  mat - the SeqBAIJ matrix
1837 -  indices - the column indices
1838 
1839   Level: advanced
1840 
1841   Notes:
1842     This can be called if you have precomputed the nonzero structure of the
1843   matrix and want to provide it to the matrix object to improve the performance
1844   of the MatSetValues() operation.
1845 
1846     You MUST have set the correct numbers of nonzeros per row in the call to
1847   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1848 
1849     MUST be called before any calls to MatSetValues();
1850 
1851 @*/
1852 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1853 {
1854   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1855 
1856   PetscFunctionBegin;
1857   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1858   PetscValidPointer(indices,2);
1859   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1860   if (f) {
1861     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1862   } else {
1863     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1864   }
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1870 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1871 {
1872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1873   PetscErrorCode ierr;
1874   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1875   PetscReal      atmp;
1876   PetscScalar    *x,zero = 0.0;
1877   MatScalar      *aa;
1878   PetscInt       ncols,brow,krow,kcol;
1879 
1880   PetscFunctionBegin;
1881   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1882   bs   = A->rmap.bs;
1883   aa   = a->a;
1884   ai   = a->i;
1885   aj   = a->j;
1886   mbs  = a->mbs;
1887 
1888   ierr = VecSet(v,zero);CHKERRQ(ierr);
1889   if (idx) {
1890     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1891   }
1892   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1893   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1894   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1895   for (i=0; i<mbs; i++) {
1896     ncols = ai[1] - ai[0]; ai++;
1897     brow  = bs*i;
1898     for (j=0; j<ncols; j++){
1899       for (kcol=0; kcol<bs; kcol++){
1900         for (krow=0; krow<bs; krow++){
1901           atmp = PetscAbsScalar(*aa);aa++;
1902           row = brow + krow;    /* row index */
1903           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1904           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1905         }
1906       }
1907       aj++;
1908     }
1909   }
1910   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatCopy_SeqBAIJ"
1916 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1917 {
1918   PetscErrorCode ierr;
1919 
1920   PetscFunctionBegin;
1921   /* If the two matrices have the same copy implementation, use fast copy. */
1922   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1923     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1924     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1925 
1926     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1927       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1928     }
1929     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1930   } else {
1931     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1932   }
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 #undef __FUNCT__
1937 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1938 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1939 {
1940   PetscErrorCode ierr;
1941 
1942   PetscFunctionBegin;
1943   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1949 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1950 {
1951   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1952   PetscFunctionBegin;
1953   *array = a->a;
1954   PetscFunctionReturn(0);
1955 }
1956 
1957 #undef __FUNCT__
1958 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1959 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1960 {
1961   PetscFunctionBegin;
1962   PetscFunctionReturn(0);
1963 }
1964 
1965 #include "petscblaslapack.h"
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1968 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1969 {
1970   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1971   PetscErrorCode ierr;
1972   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1973   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1974 
1975   PetscFunctionBegin;
1976   if (str == SAME_NONZERO_PATTERN) {
1977     PetscScalar alpha = a;
1978     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1979   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1980     if (y->xtoy && y->XtoY != X) {
1981       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1982       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1983     }
1984     if (!y->xtoy) { /* get xtoy */
1985       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1986       y->XtoY = X;
1987     }
1988     bs2 = bs*bs;
1989     for (i=0; i<x->nz; i++) {
1990       j = 0;
1991       while (j < bs2){
1992         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1993         j++;
1994       }
1995     }
1996     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1997   } else {
1998     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1999   }
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 #undef __FUNCT__
2004 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2005 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2006 {
2007   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2008   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2009   PetscScalar    *aa = a->a;
2010 
2011   PetscFunctionBegin;
2012   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2018 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2019 {
2020   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2021   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2022   PetscScalar    *aa = a->a;
2023 
2024   PetscFunctionBegin;
2025   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2026   PetscFunctionReturn(0);
2027 }
2028 
2029 
2030 /* -------------------------------------------------------------------*/
2031 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2032        MatGetRow_SeqBAIJ,
2033        MatRestoreRow_SeqBAIJ,
2034        MatMult_SeqBAIJ_N,
2035 /* 4*/ MatMultAdd_SeqBAIJ_N,
2036        MatMultTranspose_SeqBAIJ,
2037        MatMultTransposeAdd_SeqBAIJ,
2038        MatSolve_SeqBAIJ_N,
2039        0,
2040        0,
2041 /*10*/ 0,
2042        MatLUFactor_SeqBAIJ,
2043        0,
2044        0,
2045        MatTranspose_SeqBAIJ,
2046 /*15*/ MatGetInfo_SeqBAIJ,
2047        MatEqual_SeqBAIJ,
2048        MatGetDiagonal_SeqBAIJ,
2049        MatDiagonalScale_SeqBAIJ,
2050        MatNorm_SeqBAIJ,
2051 /*20*/ 0,
2052        MatAssemblyEnd_SeqBAIJ,
2053        0,
2054        MatSetOption_SeqBAIJ,
2055        MatZeroEntries_SeqBAIJ,
2056 /*25*/ MatZeroRows_SeqBAIJ,
2057        MatLUFactorSymbolic_SeqBAIJ,
2058        MatLUFactorNumeric_SeqBAIJ_N,
2059        MatCholeskyFactorSymbolic_SeqBAIJ,
2060        MatCholeskyFactorNumeric_SeqBAIJ_N,
2061 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2062        MatILUFactorSymbolic_SeqBAIJ,
2063        MatICCFactorSymbolic_SeqBAIJ,
2064        MatGetArray_SeqBAIJ,
2065        MatRestoreArray_SeqBAIJ,
2066 /*35*/ MatDuplicate_SeqBAIJ,
2067        0,
2068        0,
2069        MatILUFactor_SeqBAIJ,
2070        0,
2071 /*40*/ MatAXPY_SeqBAIJ,
2072        MatGetSubMatrices_SeqBAIJ,
2073        MatIncreaseOverlap_SeqBAIJ,
2074        MatGetValues_SeqBAIJ,
2075        MatCopy_SeqBAIJ,
2076 /*45*/ 0,
2077        MatScale_SeqBAIJ,
2078        0,
2079        0,
2080        0,
2081 /*50*/ 0,
2082        MatGetRowIJ_SeqBAIJ,
2083        MatRestoreRowIJ_SeqBAIJ,
2084        0,
2085        0,
2086 /*55*/ 0,
2087        0,
2088        0,
2089        0,
2090        MatSetValuesBlocked_SeqBAIJ,
2091 /*60*/ MatGetSubMatrix_SeqBAIJ,
2092        MatDestroy_SeqBAIJ,
2093        MatView_SeqBAIJ,
2094        0,
2095        0,
2096 /*65*/ 0,
2097        0,
2098        0,
2099        0,
2100        0,
2101 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2102        MatConvert_Basic,
2103        0,
2104        0,
2105        0,
2106 /*75*/ 0,
2107        0,
2108        0,
2109        0,
2110        0,
2111 /*80*/ 0,
2112        0,
2113        0,
2114        0,
2115        MatLoad_SeqBAIJ,
2116 /*85*/ 0,
2117        0,
2118        0,
2119        0,
2120        0,
2121 /*90*/ 0,
2122        0,
2123        0,
2124        0,
2125        0,
2126 /*95*/ 0,
2127        0,
2128        0,
2129        0,
2130        0,
2131 /*100*/0,
2132        0,
2133        0,
2134        0,
2135        0,
2136 /*105*/0,
2137        MatRealPart_SeqBAIJ,
2138        MatImaginaryPart_SeqBAIJ
2139 };
2140 
2141 EXTERN_C_BEGIN
2142 #undef __FUNCT__
2143 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2144 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2145 {
2146   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2147   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2148   PetscErrorCode ierr;
2149 
2150   PetscFunctionBegin;
2151   if (aij->nonew != 1) {
2152     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2153   }
2154 
2155   /* allocate space for values if not already there */
2156   if (!aij->saved_values) {
2157     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2158   }
2159 
2160   /* copy values over */
2161   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2162   PetscFunctionReturn(0);
2163 }
2164 EXTERN_C_END
2165 
2166 EXTERN_C_BEGIN
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2169 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2170 {
2171   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2172   PetscErrorCode ierr;
2173   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2174 
2175   PetscFunctionBegin;
2176   if (aij->nonew != 1) {
2177     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2178   }
2179   if (!aij->saved_values) {
2180     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2181   }
2182 
2183   /* copy values over */
2184   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2185   PetscFunctionReturn(0);
2186 }
2187 EXTERN_C_END
2188 
2189 EXTERN_C_BEGIN
2190 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2191 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2192 EXTERN_C_END
2193 
2194 EXTERN_C_BEGIN
2195 #undef __FUNCT__
2196 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2197 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2198 {
2199   Mat_SeqBAIJ    *b;
2200   PetscErrorCode ierr;
2201   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2202   PetscTruth     flg,skipallocation = PETSC_FALSE;
2203 
2204   PetscFunctionBegin;
2205 
2206   if (nz == MAT_SKIP_ALLOCATION) {
2207     skipallocation = PETSC_TRUE;
2208     nz             = 0;
2209   }
2210 
2211   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2212     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2213   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2214 
2215   if (nnz && newbs != bs) {
2216     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2217   }
2218   bs   = newbs;
2219 
2220   B->rmap.bs = B->cmap.bs = bs;
2221   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2222   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2223 
2224   B->preallocated = PETSC_TRUE;
2225 
2226   mbs  = B->rmap.n/bs;
2227   nbs  = B->cmap.n/bs;
2228   bs2  = bs*bs;
2229 
2230   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2231     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2232   }
2233 
2234   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2235   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2236   if (nnz) {
2237     for (i=0; i<mbs; i++) {
2238       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2239       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2240     }
2241   }
2242 
2243   b       = (Mat_SeqBAIJ*)B->data;
2244   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2245     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2246   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2247 
2248   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2249   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2250   if (!flg) {
2251     switch (bs) {
2252     case 1:
2253       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2254       B->ops->mult            = MatMult_SeqBAIJ_1;
2255       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2256       break;
2257     case 2:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2259       B->ops->mult            = MatMult_SeqBAIJ_2;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2261       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2262       break;
2263     case 3:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2265       B->ops->mult            = MatMult_SeqBAIJ_3;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2268       break;
2269     case 4:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2271       B->ops->mult            = MatMult_SeqBAIJ_4;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2274       break;
2275     case 5:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2277       B->ops->mult            = MatMult_SeqBAIJ_5;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2279       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2280       break;
2281     case 6:
2282       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2283       B->ops->mult            = MatMult_SeqBAIJ_6;
2284       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2285       break;
2286     case 7:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2288       B->ops->mult            = MatMult_SeqBAIJ_7;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2290       break;
2291     default:
2292       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2293       B->ops->mult            = MatMult_SeqBAIJ_N;
2294       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2295       break;
2296     }
2297   }
2298   B->rmap.bs      = bs;
2299   b->mbs     = mbs;
2300   b->nbs     = nbs;
2301   if (!skipallocation) {
2302     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2303     /* b->ilen will count nonzeros in each block row so far. */
2304     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2305     if (!nnz) {
2306       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2307       else if (nz <= 0)        nz = 1;
2308       for (i=0; i<mbs; i++) b->imax[i] = nz;
2309       nz = nz*mbs;
2310     } else {
2311       nz = 0;
2312       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2313     }
2314 
2315     /* allocate the matrix space */
2316     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2317     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2318     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2319     b->singlemalloc = PETSC_TRUE;
2320 
2321     b->i[0] = 0;
2322     for (i=1; i<mbs+1; i++) {
2323       b->i[i] = b->i[i-1] + b->imax[i-1];
2324     }
2325     b->free_a     = PETSC_TRUE;
2326     b->free_ij    = PETSC_TRUE;
2327   } else {
2328     b->free_a     = PETSC_FALSE;
2329     b->free_ij    = PETSC_FALSE;
2330   }
2331 
2332   B->rmap.bs          = bs;
2333   b->bs2              = bs2;
2334   b->mbs              = mbs;
2335   b->nz               = 0;
2336   b->maxnz            = nz*bs2;
2337   B->info.nz_unneeded = (PetscReal)b->maxnz;
2338   PetscFunctionReturn(0);
2339 }
2340 EXTERN_C_END
2341 
2342 /*MC
2343    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2344    block sparse compressed row format.
2345 
2346    Options Database Keys:
2347 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2348 
2349   Level: beginner
2350 
2351 .seealso: MatCreateSeqBAIJ()
2352 M*/
2353 
2354 EXTERN_C_BEGIN
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatCreate_SeqBAIJ"
2357 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2358 {
2359   PetscErrorCode ierr;
2360   PetscMPIInt    size;
2361   Mat_SeqBAIJ    *b;
2362 
2363   PetscFunctionBegin;
2364   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2365   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2366 
2367   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2368   B->data = (void*)b;
2369   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2370   B->factor           = 0;
2371   B->mapping          = 0;
2372   b->row              = 0;
2373   b->col              = 0;
2374   b->icol             = 0;
2375   b->reallocs         = 0;
2376   b->saved_values     = 0;
2377 #if defined(PETSC_USE_MAT_SINGLE)
2378   b->setvalueslen     = 0;
2379   b->setvaluescopy    = PETSC_NULL;
2380 #endif
2381 
2382   b->sorted           = PETSC_FALSE;
2383   b->roworiented      = PETSC_TRUE;
2384   b->nonew            = 0;
2385   b->diag             = 0;
2386   b->solve_work       = 0;
2387   b->mult_work        = 0;
2388   B->spptr            = 0;
2389   B->info.nz_unneeded = (PetscReal)b->maxnz;
2390   b->keepzeroedrows   = PETSC_FALSE;
2391   b->xtoy              = 0;
2392   b->XtoY              = 0;
2393   b->compressedrow.use     = PETSC_FALSE;
2394   b->compressedrow.nrows   = 0;
2395   b->compressedrow.i       = PETSC_NULL;
2396   b->compressedrow.rindex  = PETSC_NULL;
2397   b->compressedrow.checked = PETSC_FALSE;
2398   B->same_nonzero          = PETSC_FALSE;
2399 
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2401                                      "MatInvertBlockDiagonal_SeqBAIJ",
2402                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2404                                      "MatStoreValues_SeqBAIJ",
2405                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2407                                      "MatRetrieveValues_SeqBAIJ",
2408                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2410                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2411                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2413                                      "MatConvert_SeqBAIJ_SeqAIJ",
2414                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2416                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2417                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2419                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2420                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2421   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 EXTERN_C_END
2425 
2426 #undef __FUNCT__
2427 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2428 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2429 {
2430   Mat            C;
2431   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2432   PetscErrorCode ierr;
2433   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2434 
2435   PetscFunctionBegin;
2436   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2437 
2438   *B = 0;
2439   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2440   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2441   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2442   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2443   c    = (Mat_SeqBAIJ*)C->data;
2444 
2445   C->rmap.N   = A->rmap.N;
2446   C->cmap.N   = A->cmap.N;
2447   C->rmap.bs  = A->rmap.bs;
2448   c->bs2 = a->bs2;
2449   c->mbs = a->mbs;
2450   c->nbs = a->nbs;
2451 
2452   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2453   for (i=0; i<mbs; i++) {
2454     c->imax[i] = a->imax[i];
2455     c->ilen[i] = a->ilen[i];
2456   }
2457 
2458   /* allocate the matrix space */
2459   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2460   c->singlemalloc = PETSC_TRUE;
2461   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2462   if (mbs > 0) {
2463     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2464     if (cpvalues == MAT_COPY_VALUES) {
2465       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2466     } else {
2467       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2468     }
2469   }
2470   c->sorted      = a->sorted;
2471   c->roworiented = a->roworiented;
2472   c->nonew       = a->nonew;
2473 
2474   if (a->diag) {
2475     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2476     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2477     for (i=0; i<mbs; i++) {
2478       c->diag[i] = a->diag[i];
2479     }
2480   } else c->diag        = 0;
2481   c->nz                 = a->nz;
2482   c->maxnz              = a->maxnz;
2483   c->solve_work         = 0;
2484   c->mult_work          = 0;
2485   c->free_a             = PETSC_TRUE;
2486   c->free_ij            = PETSC_TRUE;
2487   C->preallocated       = PETSC_TRUE;
2488   C->assembled          = PETSC_TRUE;
2489 
2490   c->compressedrow.use     = a->compressedrow.use;
2491   c->compressedrow.nrows   = a->compressedrow.nrows;
2492   c->compressedrow.checked = a->compressedrow.checked;
2493   if ( a->compressedrow.checked && a->compressedrow.use){
2494     i = a->compressedrow.nrows;
2495     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2496     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2497     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2498     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2499   } else {
2500     c->compressedrow.use    = PETSC_FALSE;
2501     c->compressedrow.i      = PETSC_NULL;
2502     c->compressedrow.rindex = PETSC_NULL;
2503   }
2504   C->same_nonzero = A->same_nonzero;
2505   *B = C;
2506   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2507   PetscFunctionReturn(0);
2508 }
2509 
2510 #undef __FUNCT__
2511 #define __FUNCT__ "MatLoad_SeqBAIJ"
2512 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2513 {
2514   Mat_SeqBAIJ    *a;
2515   Mat            B;
2516   PetscErrorCode ierr;
2517   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2518   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2519   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2520   PetscInt       *masked,nmask,tmp,bs2,ishift;
2521   PetscMPIInt    size;
2522   int            fd;
2523   PetscScalar    *aa;
2524   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2525 
2526   PetscFunctionBegin;
2527   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2528     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2529   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2530   bs2  = bs*bs;
2531 
2532   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2533   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2534   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2535   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2536   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2537   M = header[1]; N = header[2]; nz = header[3];
2538 
2539   if (header[3] < 0) {
2540     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2541   }
2542 
2543   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2544 
2545   /*
2546      This code adds extra rows to make sure the number of rows is
2547     divisible by the blocksize
2548   */
2549   mbs        = M/bs;
2550   extra_rows = bs - M + bs*(mbs);
2551   if (extra_rows == bs) extra_rows = 0;
2552   else                  mbs++;
2553   if (extra_rows) {
2554     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2555   }
2556 
2557   /* read in row lengths */
2558   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2559   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2560   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2561 
2562   /* read in column indices */
2563   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2564   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2565   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2566 
2567   /* loop over row lengths determining block row lengths */
2568   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2569   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2570   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2571   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2572   masked   = mask + mbs;
2573   rowcount = 0; nzcount = 0;
2574   for (i=0; i<mbs; i++) {
2575     nmask = 0;
2576     for (j=0; j<bs; j++) {
2577       kmax = rowlengths[rowcount];
2578       for (k=0; k<kmax; k++) {
2579         tmp = jj[nzcount++]/bs;
2580         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2581       }
2582       rowcount++;
2583     }
2584     browlengths[i] += nmask;
2585     /* zero out the mask elements we set */
2586     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2587   }
2588 
2589   /* create our matrix */
2590   ierr = MatCreate(comm,&B);
2591   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2592   ierr = MatSetType(B,type);CHKERRQ(ierr);
2593   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2594   a = (Mat_SeqBAIJ*)B->data;
2595 
2596   /* set matrix "i" values */
2597   a->i[0] = 0;
2598   for (i=1; i<= mbs; i++) {
2599     a->i[i]      = a->i[i-1] + browlengths[i-1];
2600     a->ilen[i-1] = browlengths[i-1];
2601   }
2602   a->nz         = 0;
2603   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2604 
2605   /* read in nonzero values */
2606   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2607   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2608   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2609 
2610   /* set "a" and "j" values into matrix */
2611   nzcount = 0; jcount = 0;
2612   for (i=0; i<mbs; i++) {
2613     nzcountb = nzcount;
2614     nmask    = 0;
2615     for (j=0; j<bs; j++) {
2616       kmax = rowlengths[i*bs+j];
2617       for (k=0; k<kmax; k++) {
2618         tmp = jj[nzcount++]/bs;
2619 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2620       }
2621     }
2622     /* sort the masked values */
2623     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2624 
2625     /* set "j" values into matrix */
2626     maskcount = 1;
2627     for (j=0; j<nmask; j++) {
2628       a->j[jcount++]  = masked[j];
2629       mask[masked[j]] = maskcount++;
2630     }
2631     /* set "a" values into matrix */
2632     ishift = bs2*a->i[i];
2633     for (j=0; j<bs; j++) {
2634       kmax = rowlengths[i*bs+j];
2635       for (k=0; k<kmax; k++) {
2636         tmp       = jj[nzcountb]/bs ;
2637         block     = mask[tmp] - 1;
2638         point     = jj[nzcountb] - bs*tmp;
2639         idx       = ishift + bs2*block + j + bs*point;
2640         a->a[idx] = (MatScalar)aa[nzcountb++];
2641       }
2642     }
2643     /* zero out the mask elements we set */
2644     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2645   }
2646   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2647 
2648   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2649   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2650   ierr = PetscFree(aa);CHKERRQ(ierr);
2651   ierr = PetscFree(jj);CHKERRQ(ierr);
2652   ierr = PetscFree(mask);CHKERRQ(ierr);
2653 
2654   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2655   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2656   ierr = MatView_Private(B);CHKERRQ(ierr);
2657 
2658   *A = B;
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatCreateSeqBAIJ"
2664 /*@C
2665    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2666    compressed row) format.  For good matrix assembly performance the
2667    user should preallocate the matrix storage by setting the parameter nz
2668    (or the array nnz).  By setting these parameters accurately, performance
2669    during matrix assembly can be increased by more than a factor of 50.
2670 
2671    Collective on MPI_Comm
2672 
2673    Input Parameters:
2674 +  comm - MPI communicator, set to PETSC_COMM_SELF
2675 .  bs - size of block
2676 .  m - number of rows
2677 .  n - number of columns
2678 .  nz - number of nonzero blocks  per block row (same for all rows)
2679 -  nnz - array containing the number of nonzero blocks in the various block rows
2680          (possibly different for each block row) or PETSC_NULL
2681 
2682    Output Parameter:
2683 .  A - the matrix
2684 
2685    Options Database Keys:
2686 .   -mat_no_unroll - uses code that does not unroll the loops in the
2687                      block calculations (much slower)
2688 .    -mat_block_size - size of the blocks to use
2689 
2690    Level: intermediate
2691 
2692    Notes:
2693    The number of rows and columns must be divisible by blocksize.
2694 
2695    If the nnz parameter is given then the nz parameter is ignored
2696 
2697    A nonzero block is any block that as 1 or more nonzeros in it
2698 
2699    The block AIJ format is fully compatible with standard Fortran 77
2700    storage.  That is, the stored row and column indices can begin at
2701    either one (as in Fortran) or zero.  See the users' manual for details.
2702 
2703    Specify the preallocated storage with either nz or nnz (not both).
2704    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2705    allocation.  For additional details, see the users manual chapter on
2706    matrices.
2707 
2708 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2709 @*/
2710 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2711 {
2712   PetscErrorCode ierr;
2713 
2714   PetscFunctionBegin;
2715   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2716   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2717   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2718   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2719   PetscFunctionReturn(0);
2720 }
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2724 /*@C
2725    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2726    per row in the matrix. For good matrix assembly performance the
2727    user should preallocate the matrix storage by setting the parameter nz
2728    (or the array nnz).  By setting these parameters accurately, performance
2729    during matrix assembly can be increased by more than a factor of 50.
2730 
2731    Collective on MPI_Comm
2732 
2733    Input Parameters:
2734 +  A - the matrix
2735 .  bs - size of block
2736 .  nz - number of block nonzeros per block row (same for all rows)
2737 -  nnz - array containing the number of block nonzeros in the various block rows
2738          (possibly different for each block row) or PETSC_NULL
2739 
2740    Options Database Keys:
2741 .   -mat_no_unroll - uses code that does not unroll the loops in the
2742                      block calculations (much slower)
2743 .    -mat_block_size - size of the blocks to use
2744 
2745    Level: intermediate
2746 
2747    Notes:
2748    If the nnz parameter is given then the nz parameter is ignored
2749 
2750    The block AIJ format is fully compatible with standard Fortran 77
2751    storage.  That is, the stored row and column indices can begin at
2752    either one (as in Fortran) or zero.  See the users' manual for details.
2753 
2754    Specify the preallocated storage with either nz or nnz (not both).
2755    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2756    allocation.  For additional details, see the users manual chapter on
2757    matrices.
2758 
2759 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2760 @*/
2761 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2762 {
2763   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2764 
2765   PetscFunctionBegin;
2766   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2767   if (f) {
2768     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2769   }
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2775 /*@
2776      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2777               (upper triangular entries in CSR format) provided by the user.
2778 
2779      Collective on MPI_Comm
2780 
2781    Input Parameters:
2782 +  comm - must be an MPI communicator of size 1
2783 .  bs - size of block
2784 .  m - number of rows
2785 .  n - number of columns
2786 .  i - row indices
2787 .  j - column indices
2788 -  a - matrix values
2789 
2790    Output Parameter:
2791 .  mat - the matrix
2792 
2793    Level: intermediate
2794 
2795    Notes:
2796        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2797     once the matrix is destroyed
2798 
2799        You cannot set new nonzero locations into this matrix, that will generate an error.
2800 
2801        The i and j indices are 0 based
2802 
2803 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2804 
2805 @*/
2806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2807 {
2808   PetscErrorCode ierr;
2809   PetscInt       ii;
2810   Mat_SeqBAIJ    *baij;
2811 
2812   PetscFunctionBegin;
2813   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2814   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2815 
2816   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2817   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2818   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2819   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2820   baij = (Mat_SeqBAIJ*)(*mat)->data;
2821   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2822 
2823   baij->i = i;
2824   baij->j = j;
2825   baij->a = a;
2826   baij->singlemalloc = PETSC_FALSE;
2827   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2828   baij->free_a       = PETSC_FALSE;
2829   baij->free_ij       = PETSC_FALSE;
2830 
2831   for (ii=0; ii<m; ii++) {
2832     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2833 #if defined(PETSC_USE_DEBUG)
2834     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2835 #endif
2836   }
2837 #if defined(PETSC_USE_DEBUG)
2838   for (ii=0; ii<baij->i[m]; ii++) {
2839     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2840     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2841   }
2842 #endif
2843 
2844   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2845   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848