xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 35e7444da1e6edf29ca585d75dc3fe3d2c63a6e4)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "../src/mat/blockinvert.h"
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr,(*f)(Mat);
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
28   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30 
31   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
32   if (f) {
33     ierr = (*f)(mat);CHKERRQ(ierr);
34   } else {
35     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
36   }
37   PetscFunctionReturn(0);
38 }
39 
40 EXTERN_C_BEGIN
41 #undef __FUNCT__
42 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
43 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
44 {
45   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
46   PetscErrorCode ierr;
47   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
48   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
49   PetscReal      shift = 0.0;
50 
51   PetscFunctionBegin;
52   if (a->idiagvalid) PetscFunctionReturn(0);
53   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
54   diag_offset = a->diag;
55   if (!a->idiag) {
56     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
57     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
58   }
59   diag  = a->idiag;
60   mdiag = a->idiag+bs*bs*mbs;
61   /* factor and invert each block */
62   switch (bs){
63     case 1:
64       for (i=0; i<mbs; i++) {
65         odiag = v + 1*diag_offset[i];
66         diag[0]  = odiag[0];
67         mdiag[0] = odiag[0];
68         diag[0]  = 1.0 / (diag[0] + shift);
69         diag    += 1;
70         mdiag   += 1;
71       }
72       break;
73     case 2:
74       for (i=0; i<mbs; i++) {
75         odiag   = v + 4*diag_offset[i];
76         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
78 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
79 	diag    += 4;
80 	mdiag   += 4;
81       }
82       break;
83     case 3:
84       for (i=0; i<mbs; i++) {
85         odiag    = v + 9*diag_offset[i];
86         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
87         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
88         diag[8]  = odiag[8];
89         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
90         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
91         mdiag[8] = odiag[8];
92 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
93 	diag    += 9;
94 	mdiag   += 9;
95       }
96       break;
97     case 4:
98       for (i=0; i<mbs; i++) {
99         odiag  = v + 16*diag_offset[i];
100         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
101         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
103 	diag  += 16;
104 	mdiag += 16;
105       }
106       break;
107     case 5:
108       for (i=0; i<mbs; i++) {
109         odiag = v + 25*diag_offset[i];
110         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
111         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
113 	diag  += 25;
114 	mdiag += 25;
115       }
116       break;
117     case 6:
118       for (i=0; i<mbs; i++) {
119         odiag = v + 36*diag_offset[i];
120         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
121         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
123 	diag  += 36;
124 	mdiag += 36;
125       }
126       break;
127     default:
128       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
129   }
130   a->idiagvalid = PETSC_TRUE;
131   PetscFunctionReturn(0);
132 }
133 EXTERN_C_END
134 
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
137 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
138 {
139   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
140   PetscScalar        *x,x1,s1;
141   const PetscScalar  *b;
142   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
143   PetscErrorCode     ierr;
144   PetscInt           m = a->mbs,i,i2,nz,j;
145   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
149   its = its*lits;
150   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
155 
156   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
157 
158   diag  = a->diag;
159   idiag = a->idiag;
160   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
161   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
162 
163   if (flag & SOR_ZERO_INITIAL_GUESS) {
164     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
165       x[0] = b[0]*idiag[0];
166       i2     = 1;
167       idiag += 1;
168       for (i=1; i<m; i++) {
169         v     = aa + ai[i];
170         vi    = aj + ai[i];
171         nz    = diag[i] - ai[i];
172         s1    = b[i2];
173         for (j=0; j<nz; j++) {
174           s1 -= v[j]*x[vi[j]];
175         }
176         x[i2]   = idiag[0]*s1;
177         idiag   += 1;
178         i2      += 1;
179       }
180       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
181       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
182     }
183     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
184         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
185       i2    = 0;
186       mdiag = a->idiag+a->mbs;
187       for (i=0; i<m; i++) {
188         x1      = x[i2];
189         x[i2]   = mdiag[0]*x1;
190         mdiag  += 1;
191         i2     += 1;
192       }
193       ierr = PetscLogFlops(m);CHKERRQ(ierr);
194     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
195       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
196     }
197     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
198       idiag   = a->idiag+a->mbs - 1;
199       i2      = m - 1;
200       x1      = x[i2];
201       x[i2]   = idiag[0]*x1;
202       idiag -= 1;
203       i2    -= 1;
204       for (i=m-2; i>=0; i--) {
205         v     = aa + (diag[i]+1);
206         vi    = aj + diag[i] + 1;
207         nz    = ai[i+1] - diag[i] - 1;
208         s1    = x[i2];
209         for (j=0; j<nz; j++) {
210           s1 -= v[j]*x[vi[j]];
211         }
212         x[i2]   = idiag[0]*s1;
213         idiag   -= 1;
214         i2      -= 1;
215       }
216       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
217     }
218   } else {
219     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
220   }
221   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
222   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
223   PetscFunctionReturn(0);
224 }
225 
226 #undef __FUNCT__
227 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
228 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
229 {
230   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
231   PetscScalar        *x,x1,x2,s1,s2;
232   const PetscScalar  *b;
233   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
234   PetscErrorCode     ierr;
235   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
236   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
237 
238   PetscFunctionBegin;
239   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
240   its = its*lits;
241   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
242   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
243   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
244   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
245   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
246 
247   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
248 
249   diag  = a->diag;
250   idiag = a->idiag;
251   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
252   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
253 
254   if (flag & SOR_ZERO_INITIAL_GUESS) {
255     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
256       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
257       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
258       i2     = 2;
259       idiag += 4;
260       for (i=1; i<m; i++) {
261 	v     = aa + 4*ai[i];
262 	vi    = aj + ai[i];
263 	nz    = diag[i] - ai[i];
264 	s1    = b[i2]; s2 = b[i2+1];
265         for (j=0; j<nz; j++) {
266 	  idx  = 2*vi[j];
267           it   = 4*j;
268 	  x1   = x[idx]; x2 = x[1+idx];
269 	  s1  -= v[it]*x1 + v[it+2]*x2;
270 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
271 	}
272 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
273 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
274         idiag   += 4;
275         i2      += 2;
276       }
277       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
278       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
279     }
280     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
281         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
282       i2    = 0;
283       mdiag = a->idiag+4*a->mbs;
284       for (i=0; i<m; i++) {
285         x1      = x[i2]; x2 = x[i2+1];
286         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
287         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
288         mdiag  += 4;
289         i2     += 2;
290       }
291       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
292     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
293       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
294     }
295     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
296       idiag   = a->idiag+4*a->mbs - 4;
297       i2      = 2*m - 2;
298       x1      = x[i2]; x2 = x[i2+1];
299       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
300       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
301       idiag -= 4;
302       i2    -= 2;
303       for (i=m-2; i>=0; i--) {
304 	v     = aa + 4*(diag[i]+1);
305 	vi    = aj + diag[i] + 1;
306 	nz    = ai[i+1] - diag[i] - 1;
307 	s1    = x[i2]; s2 = x[i2+1];
308         for (j=0; j<nz; j++) {
309  	  idx  = 2*vi[j];
310           it   = 4*j;
311 	  x1   = x[idx]; x2 = x[1+idx];
312 	  s1  -= v[it]*x1 + v[it+2]*x2;
313 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
314 	}
315 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
316 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
317         idiag   -= 4;
318         i2      -= 2;
319       }
320       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
321     }
322   } else {
323     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
324   }
325   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
326   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
327   PetscFunctionReturn(0);
328 }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
332 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
333 {
334   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
335   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
336   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
337   const PetscScalar  *b;
338   PetscErrorCode     ierr;
339   PetscInt           m = a->mbs,i,i2,nz,idx;
340   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
341 
342   PetscFunctionBegin;
343   its = its*lits;
344   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
345   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
346   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
347   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
348   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
349   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
350 
351   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
352 
353   diag  = a->diag;
354   idiag = a->idiag;
355   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
356   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
357 
358   if (flag & SOR_ZERO_INITIAL_GUESS) {
359     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
360       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
361       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
362       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
363       i2     = 3;
364       idiag += 9;
365       for (i=1; i<m; i++) {
366         v     = aa + 9*ai[i];
367         vi    = aj + ai[i];
368         nz    = diag[i] - ai[i];
369         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
370         while (nz--) {
371           idx  = 3*(*vi++);
372           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
373           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
374           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
375           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
376           v   += 9;
377         }
378         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
379         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
380         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
381         idiag   += 9;
382         i2      += 3;
383       }
384       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
385       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
386     }
387     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
388         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
389       i2    = 0;
390       mdiag = a->idiag+9*a->mbs;
391       for (i=0; i<m; i++) {
392         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
393         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
394         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
395         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
396         mdiag  += 9;
397         i2     += 3;
398       }
399       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+9*a->mbs - 9;
405       i2      = 3*m - 3;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
408       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
409       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
410       idiag -= 9;
411       i2    -= 3;
412       for (i=m-2; i>=0; i--) {
413         v     = aa + 9*(diag[i]+1);
414         vi    = aj + diag[i] + 1;
415         nz    = ai[i+1] - diag[i] - 1;
416         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
417         while (nz--) {
418           idx  = 3*(*vi++);
419           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
420           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
421           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
422           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
423           v   += 9;
424         }
425         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
426         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
427         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
428         idiag   -= 9;
429         i2      -= 3;
430       }
431       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
432     }
433   } else {
434     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
435   }
436   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
437   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
438   PetscFunctionReturn(0);
439 }
440 
441 #undef __FUNCT__
442 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
443 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
444 {
445   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
446   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
447   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
448   const PetscScalar  *b;
449   PetscErrorCode     ierr;
450   PetscInt           m = a->mbs,i,i2,nz,idx;
451   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
452 
453   PetscFunctionBegin;
454   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
475       i2     = 4;
476       idiag += 16;
477       for (i=1; i<m; i++) {
478 	v     = aa + 16*ai[i];
479 	vi    = aj + ai[i];
480 	nz    = diag[i] - ai[i];
481 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
482 	while (nz--) {
483 	  idx  = 4*(*vi++);
484 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
485 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
486 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
487 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
488 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
489 	  v   += 16;
490 	}
491 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
492 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
493 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
494 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
495         idiag   += 16;
496         i2      += 4;
497       }
498       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
499       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
500     }
501     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
502         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
503       i2    = 0;
504       mdiag = a->idiag+16*a->mbs;
505       for (i=0; i<m; i++) {
506         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
507         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
508         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
509         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
510         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
511         mdiag  += 16;
512         i2     += 4;
513       }
514       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
515     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
516       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
517     }
518     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
519       idiag   = a->idiag+16*a->mbs - 16;
520       i2      = 4*m - 4;
521       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
522       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
523       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
524       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
525       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
526       idiag -= 16;
527       i2    -= 4;
528       for (i=m-2; i>=0; i--) {
529 	v     = aa + 16*(diag[i]+1);
530 	vi    = aj + diag[i] + 1;
531 	nz    = ai[i+1] - diag[i] - 1;
532 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
533 	while (nz--) {
534 	  idx  = 4*(*vi++);
535 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
536 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
537 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
538 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
539 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
540 	  v   += 16;
541 	}
542 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
543 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
544 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
545 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
546         idiag   -= 16;
547         i2      -= 4;
548       }
549       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
550     }
551   } else {
552     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
553   }
554   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
555   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
556   PetscFunctionReturn(0);
557 }
558 
559 #undef __FUNCT__
560 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
561 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
562 {
563   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
564   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
565   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
566   const PetscScalar  *b;
567   PetscErrorCode     ierr;
568   PetscInt           m = a->mbs,i,i2,nz,idx;
569   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
570 
571   PetscFunctionBegin;
572   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
573   its = its*lits;
574   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
575   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
576   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
577   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
578   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
579 
580   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
581 
582   diag  = a->diag;
583   idiag = a->idiag;
584   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
585   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
586 
587   if (flag & SOR_ZERO_INITIAL_GUESS) {
588     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
589       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
590       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
591       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
592       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
593       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
594       i2     = 5;
595       idiag += 25;
596       for (i=1; i<m; i++) {
597 	v     = aa + 25*ai[i];
598 	vi    = aj + ai[i];
599 	nz    = diag[i] - ai[i];
600 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
601 	while (nz--) {
602 	  idx  = 5*(*vi++);
603 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
604 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
605 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
606 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
607 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
608 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
609 	  v   += 25;
610 	}
611 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
612 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
613 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
614 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
615 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
616         idiag   += 25;
617         i2      += 5;
618       }
619       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
620       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
621     }
622     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
623         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
624       i2    = 0;
625       mdiag = a->idiag+25*a->mbs;
626       for (i=0; i<m; i++) {
627         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
628         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
629         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
630         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
631         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
632         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
633         mdiag  += 25;
634         i2     += 5;
635       }
636       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
637     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
638       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
639     }
640     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
641       idiag   = a->idiag+25*a->mbs - 25;
642       i2      = 5*m - 5;
643       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
644       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
645       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
646       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
647       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
648       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
649       idiag -= 25;
650       i2    -= 5;
651       for (i=m-2; i>=0; i--) {
652 	v     = aa + 25*(diag[i]+1);
653 	vi    = aj + diag[i] + 1;
654 	nz    = ai[i+1] - diag[i] - 1;
655 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
656 	while (nz--) {
657 	  idx  = 5*(*vi++);
658 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
659 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
660 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
661 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
662 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
663 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
664 	  v   += 25;
665 	}
666 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
667 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
668 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
669 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
670 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
671         idiag   -= 25;
672         i2      -= 5;
673       }
674       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
675     }
676   } else {
677     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
678   }
679   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
680   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
681   PetscFunctionReturn(0);
682 }
683 
684 #undef __FUNCT__
685 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
686 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
687 {
688   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
689   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
690   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
691   const PetscScalar  *b;
692   PetscErrorCode     ierr;
693   PetscInt           m = a->mbs,i,i2,nz,idx;
694   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
695 
696   PetscFunctionBegin;
697   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
698   its = its*lits;
699   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
700   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
701   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
702   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
703   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
704 
705   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
706 
707   diag  = a->diag;
708   idiag = a->idiag;
709   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
710   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
711 
712   if (flag & SOR_ZERO_INITIAL_GUESS) {
713     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
714       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
715       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
716       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
717       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
718       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
719       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
720       i2     = 6;
721       idiag += 36;
722       for (i=1; i<m; i++) {
723         v     = aa + 36*ai[i];
724         vi    = aj + ai[i];
725         nz    = diag[i] - ai[i];
726         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
727         while (nz--) {
728           idx  = 6*(*vi++);
729           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
730           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
731           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
732           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
733           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
734           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
735           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
736           v   += 36;
737         }
738         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
739         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
740         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
741         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
742         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
743         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
744         idiag   += 36;
745         i2      += 6;
746       }
747       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
748       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
749     }
750     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
751         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
752       i2    = 0;
753       mdiag = a->idiag+36*a->mbs;
754       for (i=0; i<m; i++) {
755         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
756         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
757         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
758         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
759         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
760         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
761         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
762         mdiag  += 36;
763         i2     += 6;
764       }
765       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
766     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
767       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
768     }
769     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
770       idiag   = a->idiag+36*a->mbs - 36;
771       i2      = 6*m - 6;
772       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
773       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
774       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
775       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
776       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
777       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
778       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
779       idiag -= 36;
780       i2    -= 6;
781       for (i=m-2; i>=0; i--) {
782         v     = aa + 36*(diag[i]+1);
783         vi    = aj + diag[i] + 1;
784         nz    = ai[i+1] - diag[i] - 1;
785         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
786         while (nz--) {
787           idx  = 6*(*vi++);
788           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
789           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
790           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
791           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
792           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
793           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
794           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
795           v   += 36;
796         }
797         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
798         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
799         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
800         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
801         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
802         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
803         idiag   -= 36;
804         i2      -= 6;
805       }
806       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
807     }
808   } else {
809     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
810   }
811   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
812   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
818 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
819 {
820   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
821   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
822   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
823   const PetscScalar  *b;
824   PetscErrorCode     ierr;
825   PetscInt           m = a->mbs,i,i2,nz,idx;
826   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
827 
828   PetscFunctionBegin;
829   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
830   its = its*lits;
831   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
832   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
833   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
834   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
835   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
836 
837   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
838 
839   diag  = a->diag;
840   idiag = a->idiag;
841   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
842   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
843 
844   if (flag & SOR_ZERO_INITIAL_GUESS) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
846       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
847       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
848       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
849       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
850       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
851       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
852       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
853       i2     = 7;
854       idiag += 49;
855       for (i=1; i<m; i++) {
856         v     = aa + 49*ai[i];
857         vi    = aj + ai[i];
858         nz    = diag[i] - ai[i];
859         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
860         while (nz--) {
861           idx  = 7*(*vi++);
862           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
863           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
864           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
865           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
866           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
867           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
868           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
869           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
870           v   += 49;
871         }
872         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
873         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
874         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
875         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
876         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
877         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
878         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
879         idiag   += 49;
880         i2      += 7;
881       }
882       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
883       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
884     }
885     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
886         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
887       i2    = 0;
888       mdiag = a->idiag+49*a->mbs;
889       for (i=0; i<m; i++) {
890         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
891         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
892         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
893         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
894         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
895         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
896         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
897         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
898         mdiag  += 36;
899         i2     += 6;
900       }
901       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
902     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
903       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
904     }
905     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
906       idiag   = a->idiag+49*a->mbs - 49;
907       i2      = 7*m - 7;
908       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
909       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
910       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
911       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
912       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
913       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
914       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
915       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
916       idiag -= 49;
917       i2    -= 7;
918       for (i=m-2; i>=0; i--) {
919         v     = aa + 49*(diag[i]+1);
920         vi    = aj + diag[i] + 1;
921         nz    = ai[i+1] - diag[i] - 1;
922         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
923         while (nz--) {
924           idx  = 7*(*vi++);
925           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
926           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
927           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
928           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
929           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
930           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
931           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
932           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
933           v   += 49;
934         }
935         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
936         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
937         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
938         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
939         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
940         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
941         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
942         idiag   -= 49;
943         i2      -= 7;
944       }
945       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
946     }
947   } else {
948     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
949   }
950   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
951   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956     Special version for direct calls from Fortran (Used in PETSc-fun3d)
957 */
958 #if defined(PETSC_HAVE_FORTRAN_CAPS)
959 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
960 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
961 #define matsetvaluesblocked4_ matsetvaluesblocked4
962 #endif
963 
964 EXTERN_C_BEGIN
965 #undef __FUNCT__
966 #define __FUNCT__ "matsetvaluesblocked4_"
967 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
968 {
969   Mat               A = *AA;
970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
972   PetscInt          *ai=a->i,*ailen=a->ilen;
973   PetscInt          *aj=a->j,stepval,lastcol = -1;
974   const PetscScalar *value = v;
975   MatScalar         *ap,*aa = a->a,*bap;
976 
977   PetscFunctionBegin;
978   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
979   stepval = (n-1)*4;
980   for (k=0; k<m; k++) { /* loop over added rows */
981     row  = im[k];
982     rp   = aj + ai[row];
983     ap   = aa + 16*ai[row];
984     nrow = ailen[row];
985     low  = 0;
986     high = nrow;
987     for (l=0; l<n; l++) { /* loop over added columns */
988       col = in[l];
989       if (col <= lastcol) low = 0; else high = nrow;
990       lastcol = col;
991       value = v + k*(stepval+4 + l)*4;
992       while (high-low > 7) {
993         t = (low+high)/2;
994         if (rp[t] > col) high = t;
995         else             low  = t;
996       }
997       for (i=low; i<high; i++) {
998         if (rp[i] > col) break;
999         if (rp[i] == col) {
1000           bap  = ap +  16*i;
1001           for (ii=0; ii<4; ii++,value+=stepval) {
1002             for (jj=ii; jj<16; jj+=4) {
1003               bap[jj] += *value++;
1004             }
1005           }
1006           goto noinsert2;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column index thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N >= i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i] = col;
1020       bap   = ap +  16*i;
1021       for (ii=0; ii<4; ii++,value+=stepval) {
1022         for (jj=ii; jj<16; jj+=4) {
1023           bap[jj] = *value++;
1024         }
1025       }
1026       noinsert2:;
1027       low = i;
1028     }
1029     ailen[row] = nrow;
1030   }
1031   PetscFunctionReturnVoid();
1032 }
1033 EXTERN_C_END
1034 
1035 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1036 #define matsetvalues4_ MATSETVALUES4
1037 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1038 #define matsetvalues4_ matsetvalues4
1039 #endif
1040 
1041 EXTERN_C_BEGIN
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatSetValues4_"
1044 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1045 {
1046   Mat         A = *AA;
1047   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1048   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1049   PetscInt    *ai=a->i,*ailen=a->ilen;
1050   PetscInt    *aj=a->j,brow,bcol;
1051   PetscInt    ridx,cidx,lastcol = -1;
1052   MatScalar   *ap,value,*aa=a->a,*bap;
1053 
1054   PetscFunctionBegin;
1055   for (k=0; k<m; k++) { /* loop over added rows */
1056     row  = im[k]; brow = row/4;
1057     rp   = aj + ai[brow];
1058     ap   = aa + 16*ai[brow];
1059     nrow = ailen[brow];
1060     low  = 0;
1061     high = nrow;
1062     for (l=0; l<n; l++) { /* loop over added columns */
1063       col = in[l]; bcol = col/4;
1064       ridx = row % 4; cidx = col % 4;
1065       value = v[l + k*n];
1066       if (col <= lastcol) low = 0; else high = nrow;
1067       lastcol = col;
1068       while (high-low > 7) {
1069         t = (low+high)/2;
1070         if (rp[t] > bcol) high = t;
1071         else              low  = t;
1072       }
1073       for (i=low; i<high; i++) {
1074         if (rp[i] > bcol) break;
1075         if (rp[i] == bcol) {
1076           bap  = ap +  16*i + 4*cidx + ridx;
1077           *bap += value;
1078           goto noinsert1;
1079         }
1080       }
1081       N = nrow++ - 1;
1082       high++; /* added new column thus must search to one higher than before */
1083       /* shift up all the later entries in this row */
1084       for (ii=N; ii>=i; ii--) {
1085         rp[ii+1] = rp[ii];
1086         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1087       }
1088       if (N>=i) {
1089         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1090       }
1091       rp[i]                    = bcol;
1092       ap[16*i + 4*cidx + ridx] = value;
1093       noinsert1:;
1094       low = i;
1095     }
1096     ailen[brow] = nrow;
1097   }
1098   PetscFunctionReturnVoid();
1099 }
1100 EXTERN_C_END
1101 
1102 /*
1103      Checks for missing diagonals
1104 */
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1107 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1108 {
1109   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1110   PetscErrorCode ierr;
1111   PetscInt       *diag,*jj = a->j,i;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1115   *missing = PETSC_FALSE;
1116   if (A->rmap->n > 0 && !jj) {
1117     *missing  = PETSC_TRUE;
1118     if (d) *d = 0;
1119     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1120   } else {
1121     diag     = a->diag;
1122     for (i=0; i<a->mbs; i++) {
1123       if (jj[diag[i]] != i) {
1124         *missing  = PETSC_TRUE;
1125         if (d) *d = i;
1126         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1127       }
1128     }
1129   }
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1135 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1136 {
1137   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139   PetscInt       i,j,m = a->mbs;
1140 
1141   PetscFunctionBegin;
1142   if (!a->diag) {
1143     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1144     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1145     a->free_diag = PETSC_TRUE;
1146   }
1147   for (i=0; i<m; i++) {
1148     a->diag[i] = a->i[i+1];
1149     for (j=a->i[i]; j<a->i[i+1]; j++) {
1150       if (a->j[j] == i) {
1151         a->diag[i] = j;
1152         break;
1153       }
1154     }
1155   }
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 
1160 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1161 
1162 #undef __FUNCT__
1163 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1164 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1165 {
1166   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1167   PetscErrorCode ierr;
1168   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1169   PetscInt       *tia, *tja;
1170 
1171   PetscFunctionBegin;
1172   *nn = n;
1173   if (!ia) PetscFunctionReturn(0);
1174   if (symmetric) {
1175     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1176     nz   = tia[n];
1177   } else {
1178     tia = a->i; tja = a->j;
1179   }
1180 
1181   if (!blockcompressed && bs > 1) {
1182     (*nn) *= bs;
1183     nbs    = bs;
1184     /* malloc & create the natural set of indices */
1185     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1186     if (n) {
1187       (*ia)[0] = 0;
1188       for (j=1; j<bs; j++) {
1189         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1190       }
1191     }
1192 
1193     for (i=1; i<n; i++) {
1194       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1195       for (j=1; j<bs; j++) {
1196         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1197       }
1198     }
1199     if (n) {
1200       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1201     }
1202 
1203     if (ja) {
1204       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1205       cnt = 0;
1206       for (i=0; i<n; i++) {
1207         for (j=0; j<bs; j++) {
1208           for (k=tia[i]; k<tia[i+1]; k++) {
1209             for (l=0; l<bs; l++) {
1210               (*ja)[cnt++] = bs*tja[k] + l;
1211 	    }
1212           }
1213         }
1214       }
1215     }
1216 
1217     n     *= bs;
1218     nz *= bs*bs;
1219     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1220       ierr = PetscFree(tia);CHKERRQ(ierr);
1221       ierr = PetscFree(tja);CHKERRQ(ierr);
1222     }
1223   } else if (oshift == 1) {
1224     if (symmetric) {
1225       PetscInt nz = tia[A->rmap->n/bs];
1226       /*  add 1 to i and j indices */
1227       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1228       *ia = tia;
1229       if (ja) {
1230 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1231         *ja = tja;
1232       }
1233     } else {
1234       PetscInt nz = a->i[A->rmap->n/bs];
1235       /* malloc space and  add 1 to i and j indices */
1236       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1237       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1238       if (ja) {
1239 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1240 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1241       }
1242     }
1243   } else {
1244     *ia = tia;
1245     if (ja) *ja = tja;
1246   }
1247 
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1253 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1254 {
1255   PetscErrorCode ierr;
1256 
1257   PetscFunctionBegin;
1258   if (!ia) PetscFunctionReturn(0);
1259   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1260     ierr = PetscFree(*ia);CHKERRQ(ierr);
1261     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1268 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1269 {
1270   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274 #if defined(PETSC_USE_LOG)
1275   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1276 #endif
1277   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1278   if (a->row) {
1279     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1280   }
1281   if (a->col) {
1282     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1283   }
1284   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1285   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1286   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1287   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1288   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1289   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1290   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1291   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1292   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1293 
1294   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1295   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1296   ierr = PetscFree(a);CHKERRQ(ierr);
1297 
1298   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1299   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1312 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1313 {
1314   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   switch (op) {
1319   case MAT_ROW_ORIENTED:
1320     a->roworiented    = flg;
1321     break;
1322   case MAT_KEEP_NONZERO_PATTERN:
1323     a->keepnonzeropattern = flg;
1324     break;
1325   case MAT_NEW_NONZERO_LOCATIONS:
1326     a->nonew          = (flg ? 0 : 1);
1327     break;
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     a->nonew          = (flg ? -1 : 0);
1330     break;
1331   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1332     a->nonew          = (flg ? -2 : 0);
1333     break;
1334   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1335     a->nounused       = (flg ? -1 : 0);
1336     break;
1337   case MAT_NEW_DIAGONALS:
1338   case MAT_IGNORE_OFF_PROC_ENTRIES:
1339   case MAT_USE_HASH_TABLE:
1340     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1341     break;
1342   case MAT_SYMMETRIC:
1343   case MAT_STRUCTURALLY_SYMMETRIC:
1344   case MAT_HERMITIAN:
1345   case MAT_SYMMETRY_ETERNAL:
1346     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1347     break;
1348   default:
1349     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1350   }
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 #undef __FUNCT__
1355 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1356 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1357 {
1358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1359   PetscErrorCode ierr;
1360   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1361   MatScalar      *aa,*aa_i;
1362   PetscScalar    *v_i;
1363 
1364   PetscFunctionBegin;
1365   bs  = A->rmap->bs;
1366   ai  = a->i;
1367   aj  = a->j;
1368   aa  = a->a;
1369   bs2 = a->bs2;
1370 
1371   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1372 
1373   bn  = row/bs;   /* Block number */
1374   bp  = row % bs; /* Block Position */
1375   M   = ai[bn+1] - ai[bn];
1376   *nz = bs*M;
1377 
1378   if (v) {
1379     *v = 0;
1380     if (*nz) {
1381       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1382       for (i=0; i<M; i++) { /* for each block in the block row */
1383         v_i  = *v + i*bs;
1384         aa_i = aa + bs2*(ai[bn] + i);
1385         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1386       }
1387     }
1388   }
1389 
1390   if (idx) {
1391     *idx = 0;
1392     if (*nz) {
1393       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1394       for (i=0; i<M; i++) { /* for each block in the block row */
1395         idx_i = *idx + i*bs;
1396         itmp  = bs*aj[ai[bn] + i];
1397         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1398       }
1399     }
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1406 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1407 {
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1412   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1420 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1421 {
1422   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1423   Mat            C;
1424   PetscErrorCode ierr;
1425   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1426   PetscInt       *rows,*cols,bs2=a->bs2;
1427   MatScalar      *array;
1428 
1429   PetscFunctionBegin;
1430   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1431   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1432     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1433     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1434 
1435     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1436     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1437     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1438     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1439     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1440     ierr = PetscFree(col);CHKERRQ(ierr);
1441   } else {
1442     C = *B;
1443   }
1444 
1445   array = a->a;
1446   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1447   for (i=0; i<mbs; i++) {
1448     cols[0] = i*bs;
1449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1450     len = ai[i+1] - ai[i];
1451     for (j=0; j<len; j++) {
1452       rows[0] = (*aj++)*bs;
1453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1454       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1455       array += bs2;
1456     }
1457   }
1458   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1459 
1460   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462 
1463   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1464     *B = C;
1465   } else {
1466     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1478   int            fd;
1479   PetscScalar    *aa;
1480   FILE           *file;
1481 
1482   PetscFunctionBegin;
1483   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1485   col_lens[0] = MAT_FILE_COOKIE;
1486 
1487   col_lens[1] = A->rmap->N;
1488   col_lens[2] = A->cmap->n;
1489   col_lens[3] = a->nz*bs2;
1490 
1491   /* store lengths of each row and write (including header) to file */
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1496     }
1497   }
1498   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1499   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1500 
1501   /* store column indices (zero start index) */
1502   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1503   count = 0;
1504   for (i=0; i<a->mbs; i++) {
1505     for (j=0; j<bs; j++) {
1506       for (k=a->i[i]; k<a->i[i+1]; k++) {
1507         for (l=0; l<bs; l++) {
1508           jj[count++] = bs*a->j[k] + l;
1509         }
1510       }
1511     }
1512   }
1513   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1514   ierr = PetscFree(jj);CHKERRQ(ierr);
1515 
1516   /* store nonzero values */
1517   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       for (k=a->i[i]; k<a->i[i+1]; k++) {
1522         for (l=0; l<bs; l++) {
1523           aa[count++] = a->a[bs2*k + l*bs + j];
1524         }
1525       }
1526     }
1527   }
1528   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1529   ierr = PetscFree(aa);CHKERRQ(ierr);
1530 
1531   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1532   if (file) {
1533     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1541 {
1542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1543   PetscErrorCode    ierr;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1552     Mat aij;
1553     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1555     ierr = MatDestroy(aij);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1557      PetscFunctionReturn(0);
1558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             if (a->a[bs2*k + l*bs + j] != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1578             }
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   } else {
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1588     for (i=0; i<a->mbs; i++) {
1589       for (j=0; j<bs; j++) {
1590         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1591         for (k=a->i[i]; k<a->i[i+1]; k++) {
1592           for (l=0; l<bs; l++) {
1593 #if defined(PETSC_USE_COMPLEX)
1594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1598               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             } else {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             }
1603 #else
1604             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1612   }
1613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1620 {
1621   Mat            A = (Mat) Aa;
1622   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1623   PetscErrorCode ierr;
1624   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1625   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1626   MatScalar      *aa;
1627   PetscViewer    viewer;
1628 
1629   PetscFunctionBegin;
1630 
1631   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1632   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1633 
1634   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1635 
1636   /* loop over matrix elements drawing boxes */
1637   color = PETSC_DRAW_BLUE;
1638   for (i=0,row=0; i<mbs; i++,row+=bs) {
1639     for (j=a->i[i]; j<a->i[i+1]; j++) {
1640       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642       aa = a->a + j*bs2;
1643       for (k=0; k<bs; k++) {
1644         for (l=0; l<bs; l++) {
1645           if (PetscRealPart(*aa++) >=  0.) continue;
1646           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647         }
1648       }
1649     }
1650   }
1651   color = PETSC_DRAW_CYAN;
1652   for (i=0,row=0; i<mbs; i++,row+=bs) {
1653     for (j=a->i[i]; j<a->i[i+1]; j++) {
1654       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656       aa = a->a + j*bs2;
1657       for (k=0; k<bs; k++) {
1658         for (l=0; l<bs; l++) {
1659           if (PetscRealPart(*aa++) != 0.) continue;
1660           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661         }
1662       }
1663     }
1664   }
1665 
1666   color = PETSC_DRAW_RED;
1667   for (i=0,row=0; i<mbs; i++,row+=bs) {
1668     for (j=a->i[i]; j<a->i[i+1]; j++) {
1669       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671       aa = a->a + j*bs2;
1672       for (k=0; k<bs; k++) {
1673         for (l=0; l<bs; l++) {
1674           if (PetscRealPart(*aa++) <= 0.) continue;
1675           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1676         }
1677       }
1678     }
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscReal      xl,yl,xr,yr,w,h;
1689   PetscDraw      draw;
1690   PetscTruth     isnull;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1696 
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr += w;    yr += h;  xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatView_SeqBAIJ"
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscTruth     iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii){
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1738   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1739   PetscInt    *ai = a->i,*ailen = a->ilen;
1740   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1741   MatScalar   *ap,*aa = a->a;
1742 
1743   PetscFunctionBegin;
1744   for (k=0; k<m; k++) { /* loop over rows */
1745     row  = im[k]; brow = row/bs;
1746     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1747     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1748     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1749     nrow = ailen[brow];
1750     for (l=0; l<n; l++) { /* loop over columns */
1751       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1752       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1753       col  = in[l] ;
1754       bcol = col/bs;
1755       cidx = col%bs;
1756       ridx = row%bs;
1757       high = nrow;
1758       low  = 0; /* assume unsorted */
1759       while (high-low > 5) {
1760         t = (low+high)/2;
1761         if (rp[t] > bcol) high = t;
1762         else             low  = t;
1763       }
1764       for (i=low; i<high; i++) {
1765         if (rp[i] > bcol) break;
1766         if (rp[i] == bcol) {
1767           *v++ = ap[bs2*i+bs*cidx+ridx];
1768           goto finished;
1769         }
1770       }
1771       *v++ = 0.0;
1772       finished:;
1773     }
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #define CHUNKSIZE 10
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1782 {
1783   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1784   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1785   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1786   PetscErrorCode    ierr;
1787   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1788   PetscTruth        roworiented=a->roworiented;
1789   const PetscScalar *value = v;
1790   MatScalar         *ap,*aa = a->a,*bap;
1791 
1792   PetscFunctionBegin;
1793   if (roworiented) {
1794     stepval = (n-1)*bs;
1795   } else {
1796     stepval = (m-1)*bs;
1797   }
1798   for (k=0; k<m; k++) { /* loop over added rows */
1799     row  = im[k];
1800     if (row < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1803 #endif
1804     rp   = aj + ai[row];
1805     ap   = aa + bs2*ai[row];
1806     rmax = imax[row];
1807     nrow = ailen[row];
1808     low  = 0;
1809     high = nrow;
1810     for (l=0; l<n; l++) { /* loop over added columns */
1811       if (in[l] < 0) continue;
1812 #if defined(PETSC_USE_DEBUG)
1813       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1814 #endif
1815       col = in[l];
1816       if (roworiented) {
1817         value = v + k*(stepval+bs)*bs + l*bs;
1818       } else {
1819         value = v + l*(stepval+bs)*bs + k*bs;
1820       }
1821       if (col <= lastcol) low = 0; else high = nrow;
1822       lastcol = col;
1823       while (high-low > 7) {
1824         t = (low+high)/2;
1825         if (rp[t] > col) high = t;
1826         else             low  = t;
1827       }
1828       for (i=low; i<high; i++) {
1829         if (rp[i] > col) break;
1830         if (rp[i] == col) {
1831           bap  = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++ += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++  = *value++;
1857                 }
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++  = *value++;
1888           }
1889         }
1890       }
1891       noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax   = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933   /* reset ilen and imax for each row */
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) {
1947     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   }
1949   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1952   a->reallocs          = 0;
1953   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1954 
1955   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1956   if (a->compressedrow.use){
1957     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959 
1960   A->same_nonzero = PETSC_TRUE;
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt   i,j,k,row;
1975   PetscTruth flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i+= bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2010 {
2011   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013   PetscInt       i,j,k,count,*rows;
2014   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar    zero = 0.0;
2016   MatScalar      *aa;
2017 
2018   PetscFunctionBegin;
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2022 
2023   /* copy IS values to rows, and sort them */
2024   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2025   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2026   if (baij->keepnonzeropattern) {
2027     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2028     bs_max = is_n;
2029     A->same_nonzero = PETSC_TRUE;
2030   } else {
2031     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032     A->same_nonzero = PETSC_FALSE;
2033   }
2034 
2035   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036     row   = rows[j];
2037     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041       if (diag != 0.0) {
2042         if (baij->ilen[row/bs] > 0) {
2043           baij->ilen[row/bs]       = 1;
2044           baij->j[baij->i[row/bs]] = row/bs;
2045           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2046         }
2047         /* Now insert all the diagonal values for this bs */
2048         for (k=0; k<bs; k++) {
2049           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2050         }
2051       } else { /* (diag == 0.0) */
2052         baij->ilen[row/bs] = 0;
2053       } /* end (diag == 0.0) */
2054     } else { /* (sizes[i] != bs) */
2055 #if defined (PETSC_USE_DEBUG)
2056       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2057 #endif
2058       for (k=0; k<count; k++) {
2059         aa[0] =  zero;
2060         aa    += bs;
2061       }
2062       if (diag != 0.0) {
2063         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2064       }
2065     }
2066   }
2067 
2068   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2069   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2075 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2076 {
2077   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2078   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2079   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2080   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2081   PetscErrorCode ierr;
2082   PetscInt       ridx,cidx,bs2=a->bs2;
2083   PetscTruth     roworiented=a->roworiented;
2084   MatScalar      *ap,value,*aa=a->a,*bap;
2085 
2086   PetscFunctionBegin;
2087   if (v) PetscValidScalarPointer(v,6);
2088   for (k=0; k<m; k++) { /* loop over added rows */
2089     row  = im[k];
2090     brow = row/bs;
2091     if (row < 0) continue;
2092 #if defined(PETSC_USE_DEBUG)
2093     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2094 #endif
2095     rp   = aj + ai[brow];
2096     ap   = aa + bs2*ai[brow];
2097     rmax = imax[brow];
2098     nrow = ailen[brow];
2099     low  = 0;
2100     high = nrow;
2101     for (l=0; l<n; l++) { /* loop over added columns */
2102       if (in[l] < 0) continue;
2103 #if defined(PETSC_USE_DEBUG)
2104       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2105 #endif
2106       col = in[l]; bcol = col/bs;
2107       ridx = row % bs; cidx = col % bs;
2108       if (roworiented) {
2109         value = v[l + k*n];
2110       } else {
2111         value = v[k + l*m];
2112       }
2113       if (col <= lastcol) low = 0; else high = nrow;
2114       lastcol = col;
2115       while (high-low > 7) {
2116         t = (low+high)/2;
2117         if (rp[t] > bcol) high = t;
2118         else              low  = t;
2119       }
2120       for (i=low; i<high; i++) {
2121         if (rp[i] > bcol) break;
2122         if (rp[i] == bcol) {
2123           bap  = ap +  bs2*i + bs*cidx + ridx;
2124           if (is == ADD_VALUES) *bap += value;
2125           else                  *bap  = value;
2126           goto noinsert1;
2127         }
2128       }
2129       if (nonew == 1) goto noinsert1;
2130       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2131       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2132       N = nrow++ - 1; high++;
2133       /* shift up all the later entries in this row */
2134       for (ii=N; ii>=i; ii--) {
2135         rp[ii+1] = rp[ii];
2136         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2137       }
2138       if (N>=i) {
2139         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2140       }
2141       rp[i]                      = bcol;
2142       ap[bs2*i + bs*cidx + ridx] = value;
2143       a->nz++;
2144       noinsert1:;
2145       low = i;
2146     }
2147     ailen[brow] = nrow;
2148   }
2149   A->same_nonzero = PETSC_FALSE;
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2155 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2156 {
2157   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2158   Mat            outA;
2159   PetscErrorCode ierr;
2160   PetscTruth     row_identity,col_identity;
2161 
2162   PetscFunctionBegin;
2163   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2164   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2165   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2166   if (!row_identity || !col_identity) {
2167     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2168   }
2169 
2170   outA          = inA;
2171   inA->factor   = MAT_FACTOR_LU;
2172 
2173   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2174 
2175   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2176   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2177   a->row = row;
2178   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2179   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2180   a->col = col;
2181 
2182   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2183   if (a->icol) {
2184     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2185   }
2186   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2187   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2188 
2189   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2190   if (!a->solve_work) {
2191     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2192     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2193   }
2194   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2195 
2196   PetscFunctionReturn(0);
2197 }
2198 
2199 EXTERN_C_BEGIN
2200 #undef __FUNCT__
2201 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2202 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2203 {
2204   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2205   PetscInt    i,nz,mbs;
2206 
2207   PetscFunctionBegin;
2208   nz  = baij->maxnz/baij->bs2;
2209   mbs = baij->mbs;
2210   for (i=0; i<nz; i++) {
2211     baij->j[i] = indices[i];
2212   }
2213   baij->nz = nz;
2214   for (i=0; i<mbs; i++) {
2215     baij->ilen[i] = baij->imax[i];
2216   }
2217   PetscFunctionReturn(0);
2218 }
2219 EXTERN_C_END
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2223 /*@
2224     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2225        in the matrix.
2226 
2227   Input Parameters:
2228 +  mat - the SeqBAIJ matrix
2229 -  indices - the column indices
2230 
2231   Level: advanced
2232 
2233   Notes:
2234     This can be called if you have precomputed the nonzero structure of the
2235   matrix and want to provide it to the matrix object to improve the performance
2236   of the MatSetValues() operation.
2237 
2238     You MUST have set the correct numbers of nonzeros per row in the call to
2239   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2240 
2241     MUST be called before any calls to MatSetValues();
2242 
2243 @*/
2244 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2245 {
2246   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2247 
2248   PetscFunctionBegin;
2249   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2250   PetscValidPointer(indices,2);
2251   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2252   if (f) {
2253     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2254   } else {
2255     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2256   }
2257   PetscFunctionReturn(0);
2258 }
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2262 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2263 {
2264   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2265   PetscErrorCode ierr;
2266   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2267   PetscReal      atmp;
2268   PetscScalar    *x,zero = 0.0;
2269   MatScalar      *aa;
2270   PetscInt       ncols,brow,krow,kcol;
2271 
2272   PetscFunctionBegin;
2273   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2274   bs   = A->rmap->bs;
2275   aa   = a->a;
2276   ai   = a->i;
2277   aj   = a->j;
2278   mbs  = a->mbs;
2279 
2280   ierr = VecSet(v,zero);CHKERRQ(ierr);
2281   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2282   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2283   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2284   for (i=0; i<mbs; i++) {
2285     ncols = ai[1] - ai[0]; ai++;
2286     brow  = bs*i;
2287     for (j=0; j<ncols; j++){
2288       for (kcol=0; kcol<bs; kcol++){
2289         for (krow=0; krow<bs; krow++){
2290           atmp = PetscAbsScalar(*aa);aa++;
2291           row = brow + krow;    /* row index */
2292           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2293           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2294         }
2295       }
2296       aj++;
2297     }
2298   }
2299   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatCopy_SeqBAIJ"
2305 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2306 {
2307   PetscErrorCode ierr;
2308 
2309   PetscFunctionBegin;
2310   /* If the two matrices have the same copy implementation, use fast copy. */
2311   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2312     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2313     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2314 
2315     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2316       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2317     }
2318     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2319   } else {
2320     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2321   }
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 #undef __FUNCT__
2326 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2327 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2328 {
2329   PetscErrorCode ierr;
2330 
2331   PetscFunctionBegin;
2332   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2338 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2339 {
2340   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2341   PetscFunctionBegin;
2342   *array = a->a;
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2348 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2349 {
2350   PetscFunctionBegin;
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #include "petscblaslapack.h"
2355 #undef __FUNCT__
2356 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2357 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2358 {
2359   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2360   PetscErrorCode ierr;
2361   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2362   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2363 
2364   PetscFunctionBegin;
2365   if (str == SAME_NONZERO_PATTERN) {
2366     PetscScalar alpha = a;
2367     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2368   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2369     if (y->xtoy && y->XtoY != X) {
2370       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2371       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2372     }
2373     if (!y->xtoy) { /* get xtoy */
2374       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2375       y->XtoY = X;
2376       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2377     }
2378     bs2 = bs*bs;
2379     for (i=0; i<x->nz; i++) {
2380       j = 0;
2381       while (j < bs2){
2382         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2383         j++;
2384       }
2385     }
2386     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2387   } else {
2388     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2389   }
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2395 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2396 {
2397   PetscInt rbs,cbs;
2398   PetscErrorCode ierr;
2399 
2400   PetscFunctionBegin;
2401   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2402   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2403   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2404   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2410 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2411 {
2412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2413   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2414   MatScalar      *aa = a->a;
2415 
2416   PetscFunctionBegin;
2417   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2423 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2426   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2427   MatScalar      *aa = a->a;
2428 
2429   PetscFunctionBegin;
2430   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2435 
2436 #undef __FUNCT__
2437 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2438 /*
2439     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2440 */
2441 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2442 {
2443   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2444   PetscErrorCode ierr;
2445   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2446   PetscInt       nz = a->i[m],row,*jj,mr,col;
2447 
2448   PetscFunctionBegin;
2449   *nn = n;
2450   if (!ia) PetscFunctionReturn(0);
2451   if (symmetric) {
2452     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2453   } else {
2454     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2455     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2456     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2457     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2458     jj = a->j;
2459     for (i=0; i<nz; i++) {
2460       collengths[jj[i]]++;
2461     }
2462     cia[0] = oshift;
2463     for (i=0; i<n; i++) {
2464       cia[i+1] = cia[i] + collengths[i];
2465     }
2466     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2467     jj   = a->j;
2468     for (row=0; row<m; row++) {
2469       mr = a->i[row+1] - a->i[row];
2470       for (i=0; i<mr; i++) {
2471         col = *jj++;
2472         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2473       }
2474     }
2475     ierr = PetscFree(collengths);CHKERRQ(ierr);
2476     *ia = cia; *ja = cja;
2477   }
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2483 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2484 {
2485   PetscErrorCode ierr;
2486 
2487   PetscFunctionBegin;
2488   if (!ia) PetscFunctionReturn(0);
2489   ierr = PetscFree(*ia);CHKERRQ(ierr);
2490   ierr = PetscFree(*ja);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2496 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2497 {
2498   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2499   PetscErrorCode ierr;
2500   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2501   PetscScalar    dx,*y,*xx,*w3_array;
2502   PetscScalar    *vscale_array;
2503   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2504   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2505   void           *fctx = coloring->fctx;
2506   PetscTruth     flg = PETSC_FALSE;
2507   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2508   Vec            x1_tmp;
2509 
2510   PetscFunctionBegin;
2511   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2512   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2513   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2514   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2515 
2516   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2517   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2518   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2519   if (flg) {
2520     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2521   } else {
2522     PetscTruth assembled;
2523     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2524     if (assembled) {
2525       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2526     }
2527   }
2528 
2529   x1_tmp = x1;
2530   if (!coloring->vscale){
2531     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2532   }
2533 
2534   /*
2535     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2536     coloring->F for the coarser grids from the finest
2537   */
2538   if (coloring->F) {
2539     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2540     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2541     if (m1 != m2) {
2542       coloring->F = 0;
2543       }
2544     }
2545 
2546   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2547     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2548   }
2549   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2550 
2551   /* Set w1 = F(x1) */
2552   if (coloring->F) {
2553     w1          = coloring->F; /* use already computed value of function */
2554     coloring->F = 0;
2555   } else {
2556     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2557     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2558     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2559   }
2560 
2561   if (!coloring->w3) {
2562     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2563     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2564   }
2565   w3 = coloring->w3;
2566 
2567     CHKMEMQ;
2568     /* Compute all the local scale factors, including ghost points */
2569   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2570   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2571   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2572   if (ctype == IS_COLORING_GHOSTED){
2573     col_start = 0; col_end = N;
2574   } else if (ctype == IS_COLORING_GLOBAL){
2575     xx = xx - start;
2576     vscale_array = vscale_array - start;
2577     col_start = start; col_end = N + start;
2578   }    CHKMEMQ;
2579   for (col=col_start; col<col_end; col++){
2580     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2581     if (coloring->htype[0] == 'w') {
2582       dx = 1.0 + unorm;
2583     } else {
2584       dx  = xx[col];
2585     }
2586     if (dx == 0.0) dx = 1.0;
2587 #if !defined(PETSC_USE_COMPLEX)
2588     if (dx < umin && dx >= 0.0)      dx = umin;
2589     else if (dx < 0.0 && dx > -umin) dx = -umin;
2590 #else
2591     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2592     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2593 #endif
2594     dx               *= epsilon;
2595     vscale_array[col] = 1.0/dx;
2596   }     CHKMEMQ;
2597   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2598   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2599   if (ctype == IS_COLORING_GLOBAL){
2600     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2601     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2602   }
2603         CHKMEMQ;
2604   if (coloring->vscaleforrow) {
2605     vscaleforrow = coloring->vscaleforrow;
2606   } else {
2607     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2608   }
2609 
2610 
2611   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2612   /*
2613     Loop over each color
2614   */
2615   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2616   for (k=0; k<coloring->ncolors; k++) {
2617     coloring->currentcolor = k;
2618     for (i=0; i<bs; i++) {
2619       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2620       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2621       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2622       /*
2623 	Loop over each column associated with color
2624 	adding the perturbation to the vector w3.
2625       */
2626       for (l=0; l<coloring->ncolumns[k]; l++) {
2627 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2628 	if (coloring->htype[0] == 'w') {
2629 	  dx = 1.0 + unorm;
2630 	} else {
2631 	  dx  = xx[col];
2632 	}
2633 	if (dx == 0.0) dx = 1.0;
2634 #if !defined(PETSC_USE_COMPLEX)
2635 	if (dx < umin && dx >= 0.0)      dx = umin;
2636 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2637 #else
2638 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2639 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2640 #endif
2641 	dx            *= epsilon;
2642 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2643 	w3_array[col] += dx;
2644       }
2645       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2646       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2647 
2648       /*
2649 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2650 	w2 = F(x1 + dx) - F(x1)
2651       */
2652       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2653       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2654       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2655       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2656 
2657       /*
2658 	Loop over rows of vector, putting results into Jacobian matrix
2659       */
2660       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2661       for (l=0; l<coloring->nrows[k]; l++) {
2662 	row    = bs*coloring->rows[k][l];             /* local row index */
2663 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2664         for (j=0; j<bs; j++) {
2665   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2666           srows[j]  = row + start + j;
2667         }
2668 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2669       }
2670       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2671     }
2672   } /* endof for each color */
2673   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2674   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2675   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2676   ierr = PetscFree(srows);CHKERRQ(ierr);
2677 
2678   coloring->currentcolor = -1;
2679   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2680   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2681   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2682   PetscFunctionReturn(0);
2683 }
2684 
2685 /* -------------------------------------------------------------------*/
2686 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2687        MatGetRow_SeqBAIJ,
2688        MatRestoreRow_SeqBAIJ,
2689        MatMult_SeqBAIJ_N,
2690 /* 4*/ MatMultAdd_SeqBAIJ_N,
2691        MatMultTranspose_SeqBAIJ,
2692        MatMultTransposeAdd_SeqBAIJ,
2693        0,
2694        0,
2695        0,
2696 /*10*/ 0,
2697        MatLUFactor_SeqBAIJ,
2698        0,
2699        0,
2700        MatTranspose_SeqBAIJ,
2701 /*15*/ MatGetInfo_SeqBAIJ,
2702        MatEqual_SeqBAIJ,
2703        MatGetDiagonal_SeqBAIJ,
2704        MatDiagonalScale_SeqBAIJ,
2705        MatNorm_SeqBAIJ,
2706 /*20*/ 0,
2707        MatAssemblyEnd_SeqBAIJ,
2708        MatSetOption_SeqBAIJ,
2709        MatZeroEntries_SeqBAIJ,
2710 /*24*/ MatZeroRows_SeqBAIJ,
2711        0,
2712        0,
2713        0,
2714        0,
2715 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2716        0,
2717        0,
2718        MatGetArray_SeqBAIJ,
2719        MatRestoreArray_SeqBAIJ,
2720 /*34*/ MatDuplicate_SeqBAIJ,
2721        0,
2722        0,
2723        MatILUFactor_SeqBAIJ,
2724        0,
2725 /*39*/ MatAXPY_SeqBAIJ,
2726        MatGetSubMatrices_SeqBAIJ,
2727        MatIncreaseOverlap_SeqBAIJ,
2728        MatGetValues_SeqBAIJ,
2729        MatCopy_SeqBAIJ,
2730 /*44*/ 0,
2731        MatScale_SeqBAIJ,
2732        0,
2733        0,
2734        0,
2735 /*49*/ MatSetBlockSize_SeqBAIJ,
2736        MatGetRowIJ_SeqBAIJ,
2737        MatRestoreRowIJ_SeqBAIJ,
2738        MatGetColumnIJ_SeqBAIJ,
2739        MatRestoreColumnIJ_SeqBAIJ,
2740 /*54*/ MatFDColoringCreate_SeqAIJ,
2741        0,
2742        0,
2743        0,
2744        MatSetValuesBlocked_SeqBAIJ,
2745 /*59*/ MatGetSubMatrix_SeqBAIJ,
2746        MatDestroy_SeqBAIJ,
2747        MatView_SeqBAIJ,
2748        0,
2749        0,
2750 /*64*/ 0,
2751        0,
2752        0,
2753        0,
2754        0,
2755 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2756        0,
2757        MatConvert_Basic,
2758        0,
2759        0,
2760 /*74*/ 0,
2761        MatFDColoringApply_BAIJ,
2762        0,
2763        0,
2764        0,
2765 /*79*/ 0,
2766        0,
2767        0,
2768        0,
2769        MatLoad_SeqBAIJ,
2770 /*84*/ 0,
2771        0,
2772        0,
2773        0,
2774        0,
2775 /*89*/ 0,
2776        0,
2777        0,
2778        0,
2779        0,
2780 /*94*/ 0,
2781        0,
2782        0,
2783        0,
2784        0,
2785 /*99*/0,
2786        0,
2787        0,
2788        0,
2789        0,
2790 /*104*/0,
2791        MatRealPart_SeqBAIJ,
2792        MatImaginaryPart_SeqBAIJ,
2793        0,
2794        0,
2795 /*109*/0,
2796        0,
2797        0,
2798        0,
2799        MatMissingDiagonal_SeqBAIJ,
2800 /*114*/0,
2801        0,
2802        0,
2803        0,
2804        0,
2805 /*119*/0,
2806        0,
2807        MatMultHermitianTranspose_SeqBAIJ,
2808        MatMultHermitianTransposeAdd_SeqBAIJ
2809 };
2810 
2811 EXTERN_C_BEGIN
2812 #undef __FUNCT__
2813 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2814 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2815 {
2816   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2817   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2818   PetscErrorCode ierr;
2819 
2820   PetscFunctionBegin;
2821   if (aij->nonew != 1) {
2822     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2823   }
2824 
2825   /* allocate space for values if not already there */
2826   if (!aij->saved_values) {
2827     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2828     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2829   }
2830 
2831   /* copy values over */
2832   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2833   PetscFunctionReturn(0);
2834 }
2835 EXTERN_C_END
2836 
2837 EXTERN_C_BEGIN
2838 #undef __FUNCT__
2839 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2840 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2841 {
2842   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2843   PetscErrorCode ierr;
2844   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2845 
2846   PetscFunctionBegin;
2847   if (aij->nonew != 1) {
2848     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2849   }
2850   if (!aij->saved_values) {
2851     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2852   }
2853 
2854   /* copy values over */
2855   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2856   PetscFunctionReturn(0);
2857 }
2858 EXTERN_C_END
2859 
2860 EXTERN_C_BEGIN
2861 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2862 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2863 EXTERN_C_END
2864 
2865 EXTERN_C_BEGIN
2866 #undef __FUNCT__
2867 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2868 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2869 {
2870   Mat_SeqBAIJ    *b;
2871   PetscErrorCode ierr;
2872   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2873   PetscTruth     flg,skipallocation = PETSC_FALSE;
2874 
2875   PetscFunctionBegin;
2876 
2877   if (nz == MAT_SKIP_ALLOCATION) {
2878     skipallocation = PETSC_TRUE;
2879     nz             = 0;
2880   }
2881 
2882   if (bs < 0) {
2883     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2884       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2885     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2886     bs   = PetscAbs(bs);
2887   }
2888   if (nnz && newbs != bs) {
2889     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2890   }
2891   bs   = newbs;
2892 
2893   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2894   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2895   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2896   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2897 
2898   B->preallocated = PETSC_TRUE;
2899 
2900   mbs  = B->rmap->n/bs;
2901   nbs  = B->cmap->n/bs;
2902   bs2  = bs*bs;
2903 
2904   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2905     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2906   }
2907 
2908   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2909   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2910   if (nnz) {
2911     for (i=0; i<mbs; i++) {
2912       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2913       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2914     }
2915   }
2916 
2917   b       = (Mat_SeqBAIJ*)B->data;
2918   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2919     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2920   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2921 
2922   if (!flg) {
2923     switch (bs) {
2924     case 1:
2925       B->ops->mult            = MatMult_SeqBAIJ_1;
2926       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2927       B->ops->sor             = MatSOR_SeqBAIJ_1;
2928       break;
2929     case 2:
2930       B->ops->mult            = MatMult_SeqBAIJ_2;
2931       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2932       B->ops->sor             = MatSOR_SeqBAIJ_2;
2933       break;
2934     case 3:
2935       B->ops->mult            = MatMult_SeqBAIJ_3;
2936       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2937       B->ops->sor             = MatSOR_SeqBAIJ_3;
2938       break;
2939     case 4:
2940       B->ops->mult            = MatMult_SeqBAIJ_4;
2941       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2942       B->ops->sor             = MatSOR_SeqBAIJ_4;
2943       break;
2944     case 5:
2945       B->ops->mult            = MatMult_SeqBAIJ_5;
2946       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2947       B->ops->sor             = MatSOR_SeqBAIJ_5;
2948       break;
2949     case 6:
2950       B->ops->mult            = MatMult_SeqBAIJ_6;
2951       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2952       B->ops->sor             = MatSOR_SeqBAIJ_6;
2953       break;
2954     case 7:
2955       B->ops->mult            = MatMult_SeqBAIJ_7;
2956       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2957       B->ops->sor             = MatSOR_SeqBAIJ_7;
2958       break;
2959     default:
2960       B->ops->mult            = MatMult_SeqBAIJ_N;
2961       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2962       break;
2963     }
2964   }
2965   B->rmap->bs      = bs;
2966   b->mbs     = mbs;
2967   b->nbs     = nbs;
2968   if (!skipallocation) {
2969     if (!b->imax) {
2970       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2971       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2972       b->free_imax_ilen = PETSC_TRUE;
2973     }
2974     /* b->ilen will count nonzeros in each block row so far. */
2975     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2976     if (!nnz) {
2977       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2978       else if (nz <= 0)        nz = 1;
2979       for (i=0; i<mbs; i++) b->imax[i] = nz;
2980       nz = nz*mbs;
2981     } else {
2982       nz = 0;
2983       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2984     }
2985 
2986     /* allocate the matrix space */
2987     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2988     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2989     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2990     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2991     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2992     b->singlemalloc = PETSC_TRUE;
2993     b->i[0] = 0;
2994     for (i=1; i<mbs+1; i++) {
2995       b->i[i] = b->i[i-1] + b->imax[i-1];
2996     }
2997     b->free_a     = PETSC_TRUE;
2998     b->free_ij    = PETSC_TRUE;
2999   } else {
3000     b->free_a     = PETSC_FALSE;
3001     b->free_ij    = PETSC_FALSE;
3002   }
3003 
3004   B->rmap->bs          = bs;
3005   b->bs2              = bs2;
3006   b->mbs              = mbs;
3007   b->nz               = 0;
3008   b->maxnz            = nz*bs2;
3009   B->info.nz_unneeded = (PetscReal)b->maxnz;
3010   PetscFunctionReturn(0);
3011 }
3012 EXTERN_C_END
3013 
3014 EXTERN_C_BEGIN
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3017 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3018 {
3019   PetscInt       i,m,nz,nz_max=0,*nnz;
3020   PetscScalar    *values=0;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024 
3025   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3026 
3027   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3028   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3029   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3030   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3031   m = B->rmap->n/bs;
3032 
3033   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3034   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3035   for(i=0; i<m; i++) {
3036     nz = ii[i+1]- ii[i];
3037     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3038     nz_max = PetscMax(nz_max, nz);
3039     nnz[i] = nz;
3040   }
3041   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3042   ierr = PetscFree(nnz);CHKERRQ(ierr);
3043 
3044   values = (PetscScalar*)V;
3045   if (!values) {
3046     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3047     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3048   }
3049   for (i=0; i<m; i++) {
3050     PetscInt          ncols  = ii[i+1] - ii[i];
3051     const PetscInt    *icols = jj + ii[i];
3052     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3053     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3054   }
3055   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3056   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3057   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3058 
3059   PetscFunctionReturn(0);
3060 }
3061 EXTERN_C_END
3062 
3063 
3064 EXTERN_C_BEGIN
3065 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3066 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3067 EXTERN_C_END
3068 
3069 /*MC
3070    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3071    block sparse compressed row format.
3072 
3073    Options Database Keys:
3074 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3075 
3076   Level: beginner
3077 
3078 .seealso: MatCreateSeqBAIJ()
3079 M*/
3080 
3081 
3082 EXTERN_C_BEGIN
3083 #undef __FUNCT__
3084 #define __FUNCT__ "MatCreate_SeqBAIJ"
3085 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3086 {
3087   PetscErrorCode ierr;
3088   PetscMPIInt    size;
3089   Mat_SeqBAIJ    *b;
3090 
3091   PetscFunctionBegin;
3092   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3093   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3094 
3095   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3096   B->data = (void*)b;
3097   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3098   B->mapping               = 0;
3099   b->row                   = 0;
3100   b->col                   = 0;
3101   b->icol                  = 0;
3102   b->reallocs              = 0;
3103   b->saved_values          = 0;
3104 
3105   b->roworiented           = PETSC_TRUE;
3106   b->nonew                 = 0;
3107   b->diag                  = 0;
3108   b->solve_work            = 0;
3109   b->mult_work             = 0;
3110   B->spptr                 = 0;
3111   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3112   b->keepnonzeropattern    = PETSC_FALSE;
3113   b->xtoy                  = 0;
3114   b->XtoY                  = 0;
3115   b->compressedrow.use     = PETSC_FALSE;
3116   b->compressedrow.nrows   = 0;
3117   b->compressedrow.i       = PETSC_NULL;
3118   b->compressedrow.rindex  = PETSC_NULL;
3119   b->compressedrow.checked = PETSC_FALSE;
3120   B->same_nonzero          = PETSC_FALSE;
3121 
3122   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3123                                      "MatGetFactorAvailable_seqbaij_petsc",
3124                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3126                                      "MatGetFactor_seqbaij_petsc",
3127                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3129                                      "MatInvertBlockDiagonal_SeqBAIJ",
3130                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3132                                      "MatStoreValues_SeqBAIJ",
3133                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3135                                      "MatRetrieveValues_SeqBAIJ",
3136                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3138                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3139                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3141                                      "MatConvert_SeqBAIJ_SeqAIJ",
3142                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3144                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3145                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3147                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3148                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3150                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3151                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3152   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 EXTERN_C_END
3156 
3157 #undef __FUNCT__
3158 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3159 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3160 {
3161   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3162   PetscErrorCode ierr;
3163   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3164 
3165   PetscFunctionBegin;
3166   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3167 
3168   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3169     c->imax = a->imax;
3170     c->ilen = a->ilen;
3171     c->free_imax_ilen = PETSC_FALSE;
3172   } else {
3173     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3174     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3175     for (i=0; i<mbs; i++) {
3176       c->imax[i] = a->imax[i];
3177       c->ilen[i] = a->ilen[i];
3178     }
3179     c->free_imax_ilen = PETSC_TRUE;
3180   }
3181 
3182   /* allocate the matrix space */
3183   if (mallocmatspace){
3184     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3185       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3186       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3187       c->singlemalloc = PETSC_FALSE;
3188       c->free_ij      = PETSC_FALSE;
3189       c->i            = a->i;
3190       c->j            = a->j;
3191       c->parent       = A;
3192       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3193       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3194       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3195     } else {
3196       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3197       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3198       c->singlemalloc = PETSC_TRUE;
3199       c->free_ij      = PETSC_TRUE;
3200       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3201       if (mbs > 0) {
3202 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3203 	if (cpvalues == MAT_COPY_VALUES) {
3204 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3205 	} else {
3206 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3207 	}
3208       }
3209     }
3210   }
3211 
3212   c->roworiented = a->roworiented;
3213   c->nonew       = a->nonew;
3214   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3215   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3216   c->bs2         = a->bs2;
3217   c->mbs         = a->mbs;
3218   c->nbs         = a->nbs;
3219 
3220   if (a->diag) {
3221     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3222       c->diag      = a->diag;
3223       c->free_diag = PETSC_FALSE;
3224     } else {
3225       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3226       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3227       for (i=0; i<mbs; i++) {
3228         c->diag[i] = a->diag[i];
3229       }
3230       c->free_diag = PETSC_TRUE;
3231     }
3232   } else c->diag        = 0;
3233   c->nz                 = a->nz;
3234   c->maxnz              = a->maxnz;
3235   c->solve_work         = 0;
3236   c->mult_work          = 0;
3237   c->free_a             = PETSC_TRUE;
3238   c->free_ij            = PETSC_TRUE;
3239   C->preallocated       = PETSC_TRUE;
3240   C->assembled          = PETSC_TRUE;
3241 
3242   c->compressedrow.use     = a->compressedrow.use;
3243   c->compressedrow.nrows   = a->compressedrow.nrows;
3244   c->compressedrow.checked = a->compressedrow.checked;
3245   if (a->compressedrow.checked && a->compressedrow.use){
3246     i = a->compressedrow.nrows;
3247     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3248     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3249     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3250     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3251   } else {
3252     c->compressedrow.use    = PETSC_FALSE;
3253     c->compressedrow.i      = PETSC_NULL;
3254     c->compressedrow.rindex = PETSC_NULL;
3255   }
3256   C->same_nonzero = A->same_nonzero;
3257   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3258   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3259   PetscFunctionReturn(0);
3260 }
3261 
3262 #undef __FUNCT__
3263 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3264 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3265 {
3266     PetscErrorCode ierr;
3267 
3268   PetscFunctionBegin;
3269   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3270   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3271   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3272   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3273   PetscFunctionReturn(0);
3274 }
3275 
3276 #undef __FUNCT__
3277 #define __FUNCT__ "MatLoad_SeqBAIJ"
3278 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3279 {
3280   Mat_SeqBAIJ    *a;
3281   Mat            B;
3282   PetscErrorCode ierr;
3283   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3284   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3285   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3286   PetscInt       *masked,nmask,tmp,bs2,ishift;
3287   PetscMPIInt    size;
3288   int            fd;
3289   PetscScalar    *aa;
3290   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3291 
3292   PetscFunctionBegin;
3293   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3294     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3295   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3296   bs2  = bs*bs;
3297 
3298   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3299   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3300   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3301   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3302   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3303   M = header[1]; N = header[2]; nz = header[3];
3304 
3305   if (header[3] < 0) {
3306     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3307   }
3308 
3309   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3310 
3311   /*
3312      This code adds extra rows to make sure the number of rows is
3313     divisible by the blocksize
3314   */
3315   mbs        = M/bs;
3316   extra_rows = bs - M + bs*(mbs);
3317   if (extra_rows == bs) extra_rows = 0;
3318   else                  mbs++;
3319   if (extra_rows) {
3320     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3321   }
3322 
3323   /* read in row lengths */
3324   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3325   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3326   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3327 
3328   /* read in column indices */
3329   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3330   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3331   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3332 
3333   /* loop over row lengths determining block row lengths */
3334   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3335   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3336   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3337   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3338   rowcount = 0;
3339   nzcount = 0;
3340   for (i=0; i<mbs; i++) {
3341     nmask = 0;
3342     for (j=0; j<bs; j++) {
3343       kmax = rowlengths[rowcount];
3344       for (k=0; k<kmax; k++) {
3345         tmp = jj[nzcount++]/bs;
3346         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3347       }
3348       rowcount++;
3349     }
3350     browlengths[i] += nmask;
3351     /* zero out the mask elements we set */
3352     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3353   }
3354 
3355   /* create our matrix */
3356   ierr = MatCreate(comm,&B);
3357   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3358   ierr = MatSetType(B,type);CHKERRQ(ierr);
3359   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3360   a = (Mat_SeqBAIJ*)B->data;
3361 
3362   /* set matrix "i" values */
3363   a->i[0] = 0;
3364   for (i=1; i<= mbs; i++) {
3365     a->i[i]      = a->i[i-1] + browlengths[i-1];
3366     a->ilen[i-1] = browlengths[i-1];
3367   }
3368   a->nz         = 0;
3369   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3370 
3371   /* read in nonzero values */
3372   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3373   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3374   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3375 
3376   /* set "a" and "j" values into matrix */
3377   nzcount = 0; jcount = 0;
3378   for (i=0; i<mbs; i++) {
3379     nzcountb = nzcount;
3380     nmask    = 0;
3381     for (j=0; j<bs; j++) {
3382       kmax = rowlengths[i*bs+j];
3383       for (k=0; k<kmax; k++) {
3384         tmp = jj[nzcount++]/bs;
3385 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3386       }
3387     }
3388     /* sort the masked values */
3389     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3390 
3391     /* set "j" values into matrix */
3392     maskcount = 1;
3393     for (j=0; j<nmask; j++) {
3394       a->j[jcount++]  = masked[j];
3395       mask[masked[j]] = maskcount++;
3396     }
3397     /* set "a" values into matrix */
3398     ishift = bs2*a->i[i];
3399     for (j=0; j<bs; j++) {
3400       kmax = rowlengths[i*bs+j];
3401       for (k=0; k<kmax; k++) {
3402         tmp       = jj[nzcountb]/bs ;
3403         block     = mask[tmp] - 1;
3404         point     = jj[nzcountb] - bs*tmp;
3405         idx       = ishift + bs2*block + j + bs*point;
3406         a->a[idx] = (MatScalar)aa[nzcountb++];
3407       }
3408     }
3409     /* zero out the mask elements we set */
3410     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3411   }
3412   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3413 
3414   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3415   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3416   ierr = PetscFree(aa);CHKERRQ(ierr);
3417   ierr = PetscFree(jj);CHKERRQ(ierr);
3418   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3419 
3420   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3421   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3422   ierr = MatView_Private(B);CHKERRQ(ierr);
3423 
3424   *A = B;
3425   PetscFunctionReturn(0);
3426 }
3427 
3428 #undef __FUNCT__
3429 #define __FUNCT__ "MatCreateSeqBAIJ"
3430 /*@C
3431    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3432    compressed row) format.  For good matrix assembly performance the
3433    user should preallocate the matrix storage by setting the parameter nz
3434    (or the array nnz).  By setting these parameters accurately, performance
3435    during matrix assembly can be increased by more than a factor of 50.
3436 
3437    Collective on MPI_Comm
3438 
3439    Input Parameters:
3440 +  comm - MPI communicator, set to PETSC_COMM_SELF
3441 .  bs - size of block
3442 .  m - number of rows
3443 .  n - number of columns
3444 .  nz - number of nonzero blocks  per block row (same for all rows)
3445 -  nnz - array containing the number of nonzero blocks in the various block rows
3446          (possibly different for each block row) or PETSC_NULL
3447 
3448    Output Parameter:
3449 .  A - the matrix
3450 
3451    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3452    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3453    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3454 
3455    Options Database Keys:
3456 .   -mat_no_unroll - uses code that does not unroll the loops in the
3457                      block calculations (much slower)
3458 .    -mat_block_size - size of the blocks to use
3459 
3460    Level: intermediate
3461 
3462    Notes:
3463    The number of rows and columns must be divisible by blocksize.
3464 
3465    If the nnz parameter is given then the nz parameter is ignored
3466 
3467    A nonzero block is any block that as 1 or more nonzeros in it
3468 
3469    The block AIJ format is fully compatible with standard Fortran 77
3470    storage.  That is, the stored row and column indices can begin at
3471    either one (as in Fortran) or zero.  See the users' manual for details.
3472 
3473    Specify the preallocated storage with either nz or nnz (not both).
3474    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3475    allocation.  For additional details, see the users manual chapter on
3476    matrices.
3477 
3478 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3479 @*/
3480 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3481 {
3482   PetscErrorCode ierr;
3483 
3484   PetscFunctionBegin;
3485   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3486   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3487   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3488   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3494 /*@C
3495    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3496    per row in the matrix. For good matrix assembly performance the
3497    user should preallocate the matrix storage by setting the parameter nz
3498    (or the array nnz).  By setting these parameters accurately, performance
3499    during matrix assembly can be increased by more than a factor of 50.
3500 
3501    Collective on MPI_Comm
3502 
3503    Input Parameters:
3504 +  A - the matrix
3505 .  bs - size of block
3506 .  nz - number of block nonzeros per block row (same for all rows)
3507 -  nnz - array containing the number of block nonzeros in the various block rows
3508          (possibly different for each block row) or PETSC_NULL
3509 
3510    Options Database Keys:
3511 .   -mat_no_unroll - uses code that does not unroll the loops in the
3512                      block calculations (much slower)
3513 .    -mat_block_size - size of the blocks to use
3514 
3515    Level: intermediate
3516 
3517    Notes:
3518    If the nnz parameter is given then the nz parameter is ignored
3519 
3520    You can call MatGetInfo() to get information on how effective the preallocation was;
3521    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3522    You can also run with the option -info and look for messages with the string
3523    malloc in them to see if additional memory allocation was needed.
3524 
3525    The block AIJ format is fully compatible with standard Fortran 77
3526    storage.  That is, the stored row and column indices can begin at
3527    either one (as in Fortran) or zero.  See the users' manual for details.
3528 
3529    Specify the preallocated storage with either nz or nnz (not both).
3530    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3531    allocation.  For additional details, see the users manual chapter on
3532    matrices.
3533 
3534 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3535 @*/
3536 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3537 {
3538   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3539 
3540   PetscFunctionBegin;
3541   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3542   if (f) {
3543     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3544   }
3545   PetscFunctionReturn(0);
3546 }
3547 
3548 #undef __FUNCT__
3549 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3550 /*@C
3551    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3552    (the default sequential PETSc format).
3553 
3554    Collective on MPI_Comm
3555 
3556    Input Parameters:
3557 +  A - the matrix
3558 .  i - the indices into j for the start of each local row (starts with zero)
3559 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3560 -  v - optional values in the matrix
3561 
3562    Level: developer
3563 
3564 .keywords: matrix, aij, compressed row, sparse
3565 
3566 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3567 @*/
3568 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3569 {
3570   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3571 
3572   PetscFunctionBegin;
3573   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3574   if (f) {
3575     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3576   }
3577   PetscFunctionReturn(0);
3578 }
3579 
3580 
3581 #undef __FUNCT__
3582 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3583 /*@
3584      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3585               (upper triangular entries in CSR format) provided by the user.
3586 
3587      Collective on MPI_Comm
3588 
3589    Input Parameters:
3590 +  comm - must be an MPI communicator of size 1
3591 .  bs - size of block
3592 .  m - number of rows
3593 .  n - number of columns
3594 .  i - row indices
3595 .  j - column indices
3596 -  a - matrix values
3597 
3598    Output Parameter:
3599 .  mat - the matrix
3600 
3601    Level: intermediate
3602 
3603    Notes:
3604        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3605     once the matrix is destroyed
3606 
3607        You cannot set new nonzero locations into this matrix, that will generate an error.
3608 
3609        The i and j indices are 0 based
3610 
3611 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3612 
3613 @*/
3614 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3615 {
3616   PetscErrorCode ierr;
3617   PetscInt       ii;
3618   Mat_SeqBAIJ    *baij;
3619 
3620   PetscFunctionBegin;
3621   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3622   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3623 
3624   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3625   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3626   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3627   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3628   baij = (Mat_SeqBAIJ*)(*mat)->data;
3629   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3630   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3631 
3632   baij->i = i;
3633   baij->j = j;
3634   baij->a = a;
3635   baij->singlemalloc = PETSC_FALSE;
3636   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3637   baij->free_a       = PETSC_FALSE;
3638   baij->free_ij       = PETSC_FALSE;
3639 
3640   for (ii=0; ii<m; ii++) {
3641     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3642 #if defined(PETSC_USE_DEBUG)
3643     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3644 #endif
3645   }
3646 #if defined(PETSC_USE_DEBUG)
3647   for (ii=0; ii<baij->i[m]; ii++) {
3648     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3649     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3650   }
3651 #endif
3652 
3653   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   PetscFunctionReturn(0);
3656 }
3657