xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a201590f66d78a0ee8e1bf4e6f9fcce39f32a809)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       *tia, *tja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (ja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353 	    }
1354           }
1355         }
1356       }
1357     }
1358 
1359     n     *= bs;
1360     nz *= bs*bs;
1361     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362       ierr = PetscFree(tia);CHKERRQ(ierr);
1363       ierr = PetscFree(tja);CHKERRQ(ierr);
1364     }
1365   } else if (oshift == 1) {
1366     if (symmetric) {
1367       PetscInt nz = tia[A->rmap->n/bs];
1368       /*  add 1 to i and j indices */
1369       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370       *ia = tia;
1371       if (ja) {
1372 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373         *ja = tja;
1374       }
1375     } else {
1376       PetscInt nz = a->i[A->rmap->n/bs];
1377       /* malloc space and  add 1 to i and j indices */
1378       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1379       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380       if (ja) {
1381 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1382 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383       }
1384     }
1385   } else {
1386     *ia = tia;
1387     if (ja) *ja = tja;
1388   }
1389 
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!ia) PetscFunctionReturn(0);
1401   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402     ierr = PetscFree(*ia);CHKERRQ(ierr);
1403     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411 {
1412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416 #if defined(PETSC_USE_LOG)
1417   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418 #endif
1419   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1420   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1421   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1422   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1423   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1424   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1425   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1426   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1427   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1428   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1429   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1430   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1431   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1432 
1433   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1434   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1435   ierr = PetscFree(A->data);CHKERRQ(ierr);
1436 
1437   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1454 {
1455   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   switch (op) {
1460   case MAT_ROW_ORIENTED:
1461     a->roworiented    = flg;
1462     break;
1463   case MAT_KEEP_NONZERO_PATTERN:
1464     a->keepnonzeropattern = flg;
1465     break;
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467     a->nonew          = (flg ? 0 : 1);
1468     break;
1469   case MAT_NEW_NONZERO_LOCATION_ERR:
1470     a->nonew          = (flg ? -1 : 0);
1471     break;
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473     a->nonew          = (flg ? -2 : 0);
1474     break;
1475   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476     a->nounused       = (flg ? -1 : 0);
1477     break;
1478   case MAT_CHECK_COMPRESSED_ROW:
1479     a->compressedrow.check = flg;
1480     break;
1481   case MAT_NEW_DIAGONALS:
1482   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483   case MAT_USE_HASH_TABLE:
1484     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1485     break;
1486   case MAT_SYMMETRIC:
1487   case MAT_STRUCTURALLY_SYMMETRIC:
1488   case MAT_HERMITIAN:
1489   case MAT_SYMMETRY_ETERNAL:
1490     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1491     break;
1492   default:
1493     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1500 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1501 {
1502   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode ierr;
1504   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1505   MatScalar      *aa,*aa_i;
1506   PetscScalar    *v_i;
1507 
1508   PetscFunctionBegin;
1509   bs  = A->rmap->bs;
1510   ai  = a->i;
1511   aj  = a->j;
1512   aa  = a->a;
1513   bs2 = a->bs2;
1514 
1515   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1516 
1517   bn  = row/bs;   /* Block number */
1518   bp  = row % bs; /* Block Position */
1519   M   = ai[bn+1] - ai[bn];
1520   *nz = bs*M;
1521 
1522   if (v) {
1523     *v = 0;
1524     if (*nz) {
1525       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1526       for (i=0; i<M; i++) { /* for each block in the block row */
1527         v_i  = *v + i*bs;
1528         aa_i = aa + bs2*(ai[bn] + i);
1529         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1530       }
1531     }
1532   }
1533 
1534   if (idx) {
1535     *idx = 0;
1536     if (*nz) {
1537       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1538       for (i=0; i<M; i++) { /* for each block in the block row */
1539         idx_i = *idx + i*bs;
1540         itmp  = bs*aj[ai[bn] + i];
1541         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1542       }
1543     }
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1550 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551 {
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1556   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1564 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1565 {
1566   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1567   Mat            C;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1570   PetscInt       *rows,*cols,bs2=a->bs2;
1571   MatScalar      *array;
1572 
1573   PetscFunctionBegin;
1574   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1576     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1577     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1578 
1579     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1580     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1581     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1582     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1583     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1584     ierr = PetscFree(col);CHKERRQ(ierr);
1585   } else {
1586     C = *B;
1587   }
1588 
1589   array = a->a;
1590   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1591   for (i=0; i<mbs; i++) {
1592     cols[0] = i*bs;
1593     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1594     len = ai[i+1] - ai[i];
1595     for (j=0; j<len; j++) {
1596       rows[0] = (*aj++)*bs;
1597       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1598       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1599       array += bs2;
1600     }
1601   }
1602   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1603 
1604   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606 
1607   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1608     *B = C;
1609   } else {
1610     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1618 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1619 {
1620   PetscErrorCode ierr;
1621   Mat            Btrans;
1622 
1623   PetscFunctionBegin;
1624   *f = PETSC_FALSE;
1625   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1626   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1627   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1628   PetscFunctionReturn(0);
1629 }
1630 EXTERN_C_END
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1634 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1635 {
1636   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1637   PetscErrorCode ierr;
1638   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1639   int            fd;
1640   PetscScalar    *aa;
1641   FILE           *file;
1642 
1643   PetscFunctionBegin;
1644   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1645   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1646   col_lens[0] = MAT_FILE_CLASSID;
1647 
1648   col_lens[1] = A->rmap->N;
1649   col_lens[2] = A->cmap->n;
1650   col_lens[3] = a->nz*bs2;
1651 
1652   /* store lengths of each row and write (including header) to file */
1653   count = 0;
1654   for (i=0; i<a->mbs; i++) {
1655     for (j=0; j<bs; j++) {
1656       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1657     }
1658   }
1659   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1660   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1661 
1662   /* store column indices (zero start index) */
1663   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1664   count = 0;
1665   for (i=0; i<a->mbs; i++) {
1666     for (j=0; j<bs; j++) {
1667       for (k=a->i[i]; k<a->i[i+1]; k++) {
1668         for (l=0; l<bs; l++) {
1669           jj[count++] = bs*a->j[k] + l;
1670         }
1671       }
1672     }
1673   }
1674   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1675   ierr = PetscFree(jj);CHKERRQ(ierr);
1676 
1677   /* store nonzero values */
1678   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1679   count = 0;
1680   for (i=0; i<a->mbs; i++) {
1681     for (j=0; j<bs; j++) {
1682       for (k=a->i[i]; k<a->i[i+1]; k++) {
1683         for (l=0; l<bs; l++) {
1684           aa[count++] = a->a[bs2*k + l*bs + j];
1685         }
1686       }
1687     }
1688   }
1689   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1690   ierr = PetscFree(aa);CHKERRQ(ierr);
1691 
1692   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1693   if (file) {
1694     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1701 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1702 {
1703   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1704   PetscErrorCode    ierr;
1705   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1706   PetscViewerFormat format;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1710   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1711     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1712   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1713     Mat aij;
1714     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1715     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1716     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1717   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1718      PetscFunctionReturn(0);
1719   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1720     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1721     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1722     for (i=0; i<a->mbs; i++) {
1723       for (j=0; j<bs; j++) {
1724         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1725         for (k=a->i[i]; k<a->i[i+1]; k++) {
1726           for (l=0; l<bs; l++) {
1727 #if defined(PETSC_USE_COMPLEX)
1728             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1729               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1730                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1731             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1733                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             }
1737 #else
1738             if (a->a[bs2*k + l*bs + j] != 0.0) {
1739               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1740             }
1741 #endif
1742           }
1743         }
1744         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1745       }
1746     }
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1748   } else {
1749     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1750     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1751     for (i=0; i<a->mbs; i++) {
1752       for (j=0; j<bs; j++) {
1753         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1754         for (k=a->i[i]; k<a->i[i+1]; k++) {
1755           for (l=0; l<bs; l++) {
1756 #if defined(PETSC_USE_COMPLEX)
1757             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1758               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1759                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1760             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1761               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1762                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             } else {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1765             }
1766 #else
1767             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1768 #endif
1769           }
1770         }
1771         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1772       }
1773     }
1774     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1775   }
1776   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1782 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1783 {
1784   Mat            A = (Mat) Aa;
1785   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1786   PetscErrorCode ierr;
1787   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1788   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1789   MatScalar      *aa;
1790   PetscViewer    viewer;
1791   PetscViewerFormat format;
1792 
1793   PetscFunctionBegin;
1794   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1795   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1796 
1797   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1798 
1799   /* loop over matrix elements drawing boxes */
1800 
1801   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1802     color = PETSC_DRAW_BLUE;
1803     for (i=0,row=0; i<mbs; i++,row+=bs) {
1804       for (j=a->i[i]; j<a->i[i+1]; j++) {
1805         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807         aa = a->a + j*bs2;
1808         for (k=0; k<bs; k++) {
1809           for (l=0; l<bs; l++) {
1810             if (PetscRealPart(*aa++) >=  0.) continue;
1811             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1812           }
1813         }
1814       }
1815     }
1816     color = PETSC_DRAW_CYAN;
1817     for (i=0,row=0; i<mbs; i++,row+=bs) {
1818       for (j=a->i[i]; j<a->i[i+1]; j++) {
1819         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821         aa = a->a + j*bs2;
1822         for (k=0; k<bs; k++) {
1823           for (l=0; l<bs; l++) {
1824             if (PetscRealPart(*aa++) != 0.) continue;
1825             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1826           }
1827         }
1828       }
1829     }
1830     color = PETSC_DRAW_RED;
1831     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835         aa = a->a + j*bs2;
1836         for (k=0; k<bs; k++) {
1837           for (l=0; l<bs; l++) {
1838             if (PetscRealPart(*aa++) <= 0.) continue;
1839             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840           }
1841         }
1842       }
1843     }
1844   } else {
1845     /* use contour shading to indicate magnitude of values */
1846     /* first determine max of all nonzero values */
1847     PetscDraw   popup;
1848     PetscReal scale,maxv = 0.0;
1849 
1850     for (i=0; i<a->nz*a->bs2; i++) {
1851       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1852     }
1853     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1854     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1855     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1856     for (i=0,row=0; i<mbs; i++,row+=bs) {
1857       for (j=a->i[i]; j<a->i[i+1]; j++) {
1858         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1859         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1860         aa = a->a + j*bs2;
1861         for (k=0; k<bs; k++) {
1862           for (l=0; l<bs; l++) {
1863             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1864             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1865           }
1866         }
1867       }
1868     }
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1875 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1876 {
1877   PetscErrorCode ierr;
1878   PetscReal      xl,yl,xr,yr,w,h;
1879   PetscDraw      draw;
1880   PetscBool      isnull;
1881 
1882   PetscFunctionBegin;
1883 
1884   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1885   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1886 
1887   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1888   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1889   xr += w;    yr += h;  xl = -w;     yl = -h;
1890   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1891   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1892   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatView_SeqBAIJ"
1898 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1899 {
1900   PetscErrorCode ierr;
1901   PetscBool      iascii,isbinary,isdraw;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1905   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1906   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1907   if (iascii){
1908     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1909   } else if (isbinary) {
1910     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1911   } else if (isdraw) {
1912     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1913   } else {
1914     Mat B;
1915     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1916     ierr = MatView(B,viewer);CHKERRQ(ierr);
1917     ierr = MatDestroy(&B);CHKERRQ(ierr);
1918   }
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1925 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1926 {
1927   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1929   PetscInt    *ai = a->i,*ailen = a->ilen;
1930   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1931   MatScalar   *ap,*aa = a->a;
1932 
1933   PetscFunctionBegin;
1934   for (k=0; k<m; k++) { /* loop over rows */
1935     row  = im[k]; brow = row/bs;
1936     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1937     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1938     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1939     nrow = ailen[brow];
1940     for (l=0; l<n; l++) { /* loop over columns */
1941       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1942       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1943       col  = in[l] ;
1944       bcol = col/bs;
1945       cidx = col%bs;
1946       ridx = row%bs;
1947       high = nrow;
1948       low  = 0; /* assume unsorted */
1949       while (high-low > 5) {
1950         t = (low+high)/2;
1951         if (rp[t] > bcol) high = t;
1952         else             low  = t;
1953       }
1954       for (i=low; i<high; i++) {
1955         if (rp[i] > bcol) break;
1956         if (rp[i] == bcol) {
1957           *v++ = ap[bs2*i+bs*cidx+ridx];
1958           goto finished;
1959         }
1960       }
1961       *v++ = 0.0;
1962       finished:;
1963     }
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1970 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1971 {
1972   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1973   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1974   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1975   PetscErrorCode    ierr;
1976   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1977   PetscBool         roworiented=a->roworiented;
1978   const PetscScalar *value = v;
1979   MatScalar         *ap,*aa = a->a,*bap;
1980 
1981   PetscFunctionBegin;
1982   if (roworiented) {
1983     stepval = (n-1)*bs;
1984   } else {
1985     stepval = (m-1)*bs;
1986   }
1987   for (k=0; k<m; k++) { /* loop over added rows */
1988     row  = im[k];
1989     if (row < 0) continue;
1990 #if defined(PETSC_USE_DEBUG)
1991     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1992 #endif
1993     rp   = aj + ai[row];
1994     ap   = aa + bs2*ai[row];
1995     rmax = imax[row];
1996     nrow = ailen[row];
1997     low  = 0;
1998     high = nrow;
1999     for (l=0; l<n; l++) { /* loop over added columns */
2000       if (in[l] < 0) continue;
2001 #if defined(PETSC_USE_DEBUG)
2002       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2003 #endif
2004       col = in[l];
2005       if (roworiented) {
2006         value = v + (k*(stepval+bs) + l)*bs;
2007       } else {
2008         value = v + (l*(stepval+bs) + k)*bs;
2009       }
2010       if (col <= lastcol) low = 0; else high = nrow;
2011       lastcol = col;
2012       while (high-low > 7) {
2013         t = (low+high)/2;
2014         if (rp[t] > col) high = t;
2015         else             low  = t;
2016       }
2017       for (i=low; i<high; i++) {
2018         if (rp[i] > col) break;
2019         if (rp[i] == col) {
2020           bap  = ap +  bs2*i;
2021           if (roworiented) {
2022             if (is == ADD_VALUES) {
2023               for (ii=0; ii<bs; ii++,value+=stepval) {
2024                 for (jj=ii; jj<bs2; jj+=bs) {
2025                   bap[jj] += *value++;
2026                 }
2027               }
2028             } else {
2029               for (ii=0; ii<bs; ii++,value+=stepval) {
2030                 for (jj=ii; jj<bs2; jj+=bs) {
2031                   bap[jj] = *value++;
2032                 }
2033               }
2034             }
2035           } else {
2036             if (is == ADD_VALUES) {
2037               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038                 for (jj=0; jj<bs; jj++) {
2039                   bap[jj] += value[jj];
2040                 }
2041                 bap += bs;
2042               }
2043             } else {
2044               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2045                 for (jj=0; jj<bs; jj++) {
2046                   bap[jj]  = value[jj];
2047                 }
2048                 bap += bs;
2049               }
2050             }
2051           }
2052           goto noinsert2;
2053         }
2054       }
2055       if (nonew == 1) goto noinsert2;
2056       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2057       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2058       N = nrow++ - 1; high++;
2059       /* shift up all the later entries in this row */
2060       for (ii=N; ii>=i; ii--) {
2061         rp[ii+1] = rp[ii];
2062         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2063       }
2064       if (N >= i) {
2065         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2066       }
2067       rp[i] = col;
2068       bap   = ap +  bs2*i;
2069       if (roworiented) {
2070         for (ii=0; ii<bs; ii++,value+=stepval) {
2071           for (jj=ii; jj<bs2; jj+=bs) {
2072             bap[jj] = *value++;
2073           }
2074         }
2075       } else {
2076         for (ii=0; ii<bs; ii++,value+=stepval) {
2077           for (jj=0; jj<bs; jj++) {
2078             *bap++  = *value++;
2079           }
2080         }
2081       }
2082       noinsert2:;
2083       low = i;
2084     }
2085     ailen[row] = nrow;
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2092 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2093 {
2094   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2095   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2096   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2097   PetscErrorCode ierr;
2098   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2099   MatScalar      *aa = a->a,*ap;
2100   PetscReal      ratio=0.6;
2101 
2102   PetscFunctionBegin;
2103   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2104 
2105   if (m) rmax = ailen[0];
2106   for (i=1; i<mbs; i++) {
2107     /* move each row back by the amount of empty slots (fshift) before it*/
2108     fshift += imax[i-1] - ailen[i-1];
2109     rmax   = PetscMax(rmax,ailen[i]);
2110     if (fshift) {
2111       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2112       N = ailen[i];
2113       for (j=0; j<N; j++) {
2114         ip[j-fshift] = ip[j];
2115         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2116       }
2117     }
2118     ai[i] = ai[i-1] + ailen[i-1];
2119   }
2120   if (mbs) {
2121     fshift += imax[mbs-1] - ailen[mbs-1];
2122     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2123   }
2124   /* reset ilen and imax for each row */
2125   for (i=0; i<mbs; i++) {
2126     ailen[i] = imax[i] = ai[i+1] - ai[i];
2127   }
2128   a->nz = ai[mbs];
2129 
2130   /* diagonals may have moved, so kill the diagonal pointers */
2131   a->idiagvalid = PETSC_FALSE;
2132   if (fshift && a->diag) {
2133     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2134     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2135     a->diag = 0;
2136   }
2137   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2138   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2139   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2140   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2141   A->info.mallocs     += a->reallocs;
2142   a->reallocs          = 0;
2143   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2144 
2145   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2146   A->same_nonzero = PETSC_TRUE;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    This function returns an array of flags which indicate the locations of contiguous
2152    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2153    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2154    Assume: sizes should be long enough to hold all the values.
2155 */
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2158 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2159 {
2160   PetscInt   i,j,k,row;
2161   PetscBool  flg;
2162 
2163   PetscFunctionBegin;
2164   for (i=0,j=0; i<n; j++) {
2165     row = idx[i];
2166     if (row%bs!=0) { /* Not the begining of a block */
2167       sizes[j] = 1;
2168       i++;
2169     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2170       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2171       i++;
2172     } else { /* Begining of the block, so check if the complete block exists */
2173       flg = PETSC_TRUE;
2174       for (k=1; k<bs; k++) {
2175         if (row+k != idx[i+k]) { /* break in the block */
2176           flg = PETSC_FALSE;
2177           break;
2178         }
2179       }
2180       if (flg) { /* No break in the bs */
2181         sizes[j] = bs;
2182         i+= bs;
2183       } else {
2184         sizes[j] = 1;
2185         i++;
2186       }
2187     }
2188   }
2189   *bs_max = j;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2195 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2196 {
2197   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2198   PetscErrorCode    ierr;
2199   PetscInt          i,j,k,count,*rows;
2200   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2201   PetscScalar       zero = 0.0;
2202   MatScalar         *aa;
2203   const PetscScalar *xx;
2204   PetscScalar       *bb;
2205 
2206   PetscFunctionBegin;
2207   /* fix right hand side if needed */
2208   if (x && b) {
2209     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2210     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2211     for (i=0; i<is_n; i++) {
2212       bb[is_idx[i]] = diag*xx[is_idx[i]];
2213     }
2214     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2215     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2216   }
2217 
2218   /* Make a copy of the IS and  sort it */
2219   /* allocate memory for rows,sizes */
2220   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2221 
2222   /* copy IS values to rows, and sort them */
2223   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2224   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2225 
2226   if (baij->keepnonzeropattern) {
2227     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2228     bs_max = is_n;
2229     A->same_nonzero = PETSC_TRUE;
2230   } else {
2231     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2232     A->same_nonzero = PETSC_FALSE;
2233   }
2234 
2235   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2236     row   = rows[j];
2237     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2238     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2239     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2240     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2241       if (diag != (PetscScalar)0.0) {
2242         if (baij->ilen[row/bs] > 0) {
2243           baij->ilen[row/bs]       = 1;
2244           baij->j[baij->i[row/bs]] = row/bs;
2245           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2246         }
2247         /* Now insert all the diagonal values for this bs */
2248         for (k=0; k<bs; k++) {
2249           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2250         }
2251       } else { /* (diag == 0.0) */
2252         baij->ilen[row/bs] = 0;
2253       } /* end (diag == 0.0) */
2254     } else { /* (sizes[i] != bs) */
2255 #if defined (PETSC_USE_DEBUG)
2256       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2257 #endif
2258       for (k=0; k<count; k++) {
2259         aa[0] =  zero;
2260         aa    += bs;
2261       }
2262       if (diag != (PetscScalar)0.0) {
2263         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2264       }
2265     }
2266   }
2267 
2268   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2269   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2275 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2276 {
2277   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2278   PetscErrorCode    ierr;
2279   PetscInt          i,j,k,count;
2280   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2281   PetscScalar       zero = 0.0;
2282   MatScalar         *aa;
2283   const PetscScalar *xx;
2284   PetscScalar       *bb;
2285   PetscBool         *zeroed,vecs = PETSC_FALSE;
2286 
2287   PetscFunctionBegin;
2288   /* fix right hand side if needed */
2289   if (x && b) {
2290     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2291     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2292     vecs = PETSC_TRUE;
2293   }
2294   A->same_nonzero = PETSC_TRUE;
2295 
2296   /* zero the columns */
2297   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2298   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2299   for (i=0; i<is_n; i++) {
2300     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2301     zeroed[is_idx[i]] = PETSC_TRUE;
2302   }
2303   for (i=0; i<A->rmap->N; i++) {
2304     if (!zeroed[i]) {
2305       row = i/bs;
2306       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2307         for (k=0; k<bs; k++) {
2308           col = bs*baij->j[j] + k;
2309 	  if (zeroed[col]) {
2310 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2311             if (vecs) bb[i] -= aa[0]*xx[col];
2312             aa[0] = 0.0;
2313           }
2314         }
2315       }
2316     } else if (vecs) bb[i] = diag*xx[i];
2317   }
2318   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2319   if (vecs) {
2320     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2321     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2322   }
2323 
2324   /* zero the rows */
2325   for (i=0; i<is_n; i++) {
2326     row   = is_idx[i];
2327     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2328     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2329     for (k=0; k<count; k++) {
2330       aa[0] =  zero;
2331       aa    += bs;
2332     }
2333     if (diag != (PetscScalar)0.0) {
2334       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2335     }
2336   }
2337   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2343 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2344 {
2345   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2346   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2347   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2348   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2349   PetscErrorCode ierr;
2350   PetscInt       ridx,cidx,bs2=a->bs2;
2351   PetscBool      roworiented=a->roworiented;
2352   MatScalar      *ap,value,*aa=a->a,*bap;
2353 
2354   PetscFunctionBegin;
2355   if (v) PetscValidScalarPointer(v,6);
2356   for (k=0; k<m; k++) { /* loop over added rows */
2357     row  = im[k];
2358     brow = row/bs;
2359     if (row < 0) continue;
2360 #if defined(PETSC_USE_DEBUG)
2361     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2362 #endif
2363     rp   = aj + ai[brow];
2364     ap   = aa + bs2*ai[brow];
2365     rmax = imax[brow];
2366     nrow = ailen[brow];
2367     low  = 0;
2368     high = nrow;
2369     for (l=0; l<n; l++) { /* loop over added columns */
2370       if (in[l] < 0) continue;
2371 #if defined(PETSC_USE_DEBUG)
2372       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2373 #endif
2374       col = in[l]; bcol = col/bs;
2375       ridx = row % bs; cidx = col % bs;
2376       if (roworiented) {
2377         value = v[l + k*n];
2378       } else {
2379         value = v[k + l*m];
2380       }
2381       if (col <= lastcol) low = 0; else high = nrow;
2382       lastcol = col;
2383       while (high-low > 7) {
2384         t = (low+high)/2;
2385         if (rp[t] > bcol) high = t;
2386         else              low  = t;
2387       }
2388       for (i=low; i<high; i++) {
2389         if (rp[i] > bcol) break;
2390         if (rp[i] == bcol) {
2391           bap  = ap +  bs2*i + bs*cidx + ridx;
2392           if (is == ADD_VALUES) *bap += value;
2393           else                  *bap  = value;
2394           goto noinsert1;
2395         }
2396       }
2397       if (nonew == 1) goto noinsert1;
2398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2399       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2400       N = nrow++ - 1; high++;
2401       /* shift up all the later entries in this row */
2402       for (ii=N; ii>=i; ii--) {
2403         rp[ii+1] = rp[ii];
2404         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2405       }
2406       if (N>=i) {
2407         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2408       }
2409       rp[i]                      = bcol;
2410       ap[bs2*i + bs*cidx + ridx] = value;
2411       a->nz++;
2412       noinsert1:;
2413       low = i;
2414     }
2415     ailen[brow] = nrow;
2416   }
2417   A->same_nonzero = PETSC_FALSE;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2423 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2426   Mat            outA;
2427   PetscErrorCode ierr;
2428   PetscBool      row_identity,col_identity;
2429 
2430   PetscFunctionBegin;
2431   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2432   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2433   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2434   if (!row_identity || !col_identity) {
2435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2436   }
2437 
2438   outA            = inA;
2439   inA->factortype = MAT_FACTOR_LU;
2440 
2441   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2442 
2443   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2444   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2445   a->row = row;
2446   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2447   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2448   a->col = col;
2449 
2450   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2451   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2452    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2453   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2454 
2455   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2456   if (!a->solve_work) {
2457     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2458     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2459   }
2460   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2461 
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 EXTERN_C_BEGIN
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2468 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2469 {
2470   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2471   PetscInt    i,nz,mbs;
2472 
2473   PetscFunctionBegin;
2474   nz  = baij->maxnz;
2475   mbs = baij->mbs;
2476   for (i=0; i<nz; i++) {
2477     baij->j[i] = indices[i];
2478   }
2479   baij->nz = nz;
2480   for (i=0; i<mbs; i++) {
2481     baij->ilen[i] = baij->imax[i];
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 EXTERN_C_END
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2489 /*@
2490     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2491        in the matrix.
2492 
2493   Input Parameters:
2494 +  mat - the SeqBAIJ matrix
2495 -  indices - the column indices
2496 
2497   Level: advanced
2498 
2499   Notes:
2500     This can be called if you have precomputed the nonzero structure of the
2501   matrix and want to provide it to the matrix object to improve the performance
2502   of the MatSetValues() operation.
2503 
2504     You MUST have set the correct numbers of nonzeros per row in the call to
2505   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2506 
2507     MUST be called before any calls to MatSetValues();
2508 
2509 @*/
2510 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2511 {
2512   PetscErrorCode ierr;
2513 
2514   PetscFunctionBegin;
2515   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2516   PetscValidPointer(indices,2);
2517   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2523 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2524 {
2525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2526   PetscErrorCode ierr;
2527   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2528   PetscReal      atmp;
2529   PetscScalar    *x,zero = 0.0;
2530   MatScalar      *aa;
2531   PetscInt       ncols,brow,krow,kcol;
2532 
2533   PetscFunctionBegin;
2534   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2535   bs   = A->rmap->bs;
2536   aa   = a->a;
2537   ai   = a->i;
2538   aj   = a->j;
2539   mbs  = a->mbs;
2540 
2541   ierr = VecSet(v,zero);CHKERRQ(ierr);
2542   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2543   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2544   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2545   for (i=0; i<mbs; i++) {
2546     ncols = ai[1] - ai[0]; ai++;
2547     brow  = bs*i;
2548     for (j=0; j<ncols; j++){
2549       for (kcol=0; kcol<bs; kcol++){
2550         for (krow=0; krow<bs; krow++){
2551           atmp = PetscAbsScalar(*aa);aa++;
2552           row = brow + krow;    /* row index */
2553           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2554           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2555         }
2556       }
2557       aj++;
2558     }
2559   }
2560   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatCopy_SeqBAIJ"
2566 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2567 {
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   /* If the two matrices have the same copy implementation, use fast copy. */
2572   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2573     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2574     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2575     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2576 
2577     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2578     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2579     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2580   } else {
2581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2588 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2589 {
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2599 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2600 {
2601   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2602   PetscFunctionBegin;
2603   *array = a->a;
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2609 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2610 {
2611   PetscFunctionBegin;
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 #undef __FUNCT__
2616 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2617 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2618 {
2619   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2620   PetscErrorCode ierr;
2621   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2622   PetscBLASInt   one=1;
2623 
2624   PetscFunctionBegin;
2625   if (str == SAME_NONZERO_PATTERN) {
2626     PetscScalar alpha = a;
2627     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2628     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2629   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2630     if (y->xtoy && y->XtoY != X) {
2631       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2632       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2633     }
2634     if (!y->xtoy) { /* get xtoy */
2635       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2636       y->XtoY = X;
2637       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2638     }
2639     for (i=0; i<x->nz; i++) {
2640       j = 0;
2641       while (j < bs2){
2642         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2643         j++;
2644       }
2645     }
2646     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2647   } else {
2648     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2649   }
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2655 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2656 {
2657   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2658   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2659   MatScalar      *aa = a->a;
2660 
2661   PetscFunctionBegin;
2662   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2668 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2669 {
2670   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2671   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2672   MatScalar      *aa = a->a;
2673 
2674   PetscFunctionBegin;
2675   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2683 /*
2684     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2685 */
2686 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2687 {
2688   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2689   PetscErrorCode ierr;
2690   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2691   PetscInt       nz = a->i[m],row,*jj,mr,col;
2692 
2693   PetscFunctionBegin;
2694   *nn = n;
2695   if (!ia) PetscFunctionReturn(0);
2696   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2697   else {
2698     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2699     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2700     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2701     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2702     jj = a->j;
2703     for (i=0; i<nz; i++) {
2704       collengths[jj[i]]++;
2705     }
2706     cia[0] = oshift;
2707     for (i=0; i<n; i++) {
2708       cia[i+1] = cia[i] + collengths[i];
2709     }
2710     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2711     jj   = a->j;
2712     for (row=0; row<m; row++) {
2713       mr = a->i[row+1] - a->i[row];
2714       for (i=0; i<mr; i++) {
2715         col = *jj++;
2716         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2717       }
2718     }
2719     ierr = PetscFree(collengths);CHKERRQ(ierr);
2720     *ia = cia; *ja = cja;
2721   }
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2727 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2728 {
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   if (!ia) PetscFunctionReturn(0);
2733   ierr = PetscFree(*ia);CHKERRQ(ierr);
2734   ierr = PetscFree(*ja);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2740 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2741 {
2742   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2743   PetscErrorCode ierr;
2744   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2745   PetscScalar    dx,*y,*xx,*w3_array;
2746   PetscScalar    *vscale_array;
2747   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2748   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2749   void           *fctx = coloring->fctx;
2750   PetscBool      flg = PETSC_FALSE;
2751   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2752   Vec            x1_tmp;
2753 
2754   PetscFunctionBegin;
2755   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2756   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2757   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2758   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2759 
2760   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2761   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2762   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2763   if (flg) {
2764     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2765   } else {
2766     PetscBool  assembled;
2767     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2768     if (assembled) {
2769       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2770     }
2771   }
2772 
2773   x1_tmp = x1;
2774   if (!coloring->vscale){
2775     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2776   }
2777 
2778   /*
2779     This is a horrible, horrible, hack.
2780   */
2781   if (coloring->F) {
2782     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2783     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2784     if (m1 != m2) {
2785       coloring->F = 0;
2786       }
2787     }
2788 
2789   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2790     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2791   }
2792   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2793 
2794   /* Set w1 = F(x1) */
2795   if (coloring->F) {
2796     w1          = coloring->F; /* use already computed value of function */
2797     coloring->F = 0;
2798   } else {
2799     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2800     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2801     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2802   }
2803 
2804   if (!coloring->w3) {
2805     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2806     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2807   }
2808   w3 = coloring->w3;
2809 
2810     CHKMEMQ;
2811     /* Compute all the local scale factors, including ghost points */
2812   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2813   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2814   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2815   if (ctype == IS_COLORING_GHOSTED){
2816     col_start = 0; col_end = N;
2817   } else if (ctype == IS_COLORING_GLOBAL){
2818     xx = xx - start;
2819     vscale_array = vscale_array - start;
2820     col_start = start; col_end = N + start;
2821   }    CHKMEMQ;
2822   for (col=col_start; col<col_end; col++){
2823     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2824     if (coloring->htype[0] == 'w') {
2825       dx = 1.0 + unorm;
2826     } else {
2827       dx  = xx[col];
2828     }
2829     if (dx == (PetscScalar)0.0) dx = 1.0;
2830 #if !defined(PETSC_USE_COMPLEX)
2831     if (dx < umin && dx >= 0.0)      dx = umin;
2832     else if (dx < 0.0 && dx > -umin) dx = -umin;
2833 #else
2834     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2835     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2836 #endif
2837     dx               *= epsilon;
2838     vscale_array[col] = (PetscScalar)1.0/dx;
2839   }     CHKMEMQ;
2840   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2841   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2842   if (ctype == IS_COLORING_GLOBAL){
2843     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2844     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2845   }
2846   CHKMEMQ;
2847   if (coloring->vscaleforrow) {
2848     vscaleforrow = coloring->vscaleforrow;
2849   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2850 
2851   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2852   /*
2853     Loop over each color
2854   */
2855   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2856   for (k=0; k<coloring->ncolors; k++) {
2857     coloring->currentcolor = k;
2858     for (i=0; i<bs; i++) {
2859       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2860       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2861       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2862       /*
2863 	Loop over each column associated with color
2864 	adding the perturbation to the vector w3.
2865       */
2866       for (l=0; l<coloring->ncolumns[k]; l++) {
2867 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2868 	if (coloring->htype[0] == 'w') {
2869 	  dx = 1.0 + unorm;
2870 	} else {
2871 	  dx  = xx[col];
2872 	}
2873 	if (dx == (PetscScalar)0.0) dx = 1.0;
2874 #if !defined(PETSC_USE_COMPLEX)
2875 	if (dx < umin && dx >= 0.0)      dx = umin;
2876 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2877 #else
2878 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2879 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2880 #endif
2881 	dx            *= epsilon;
2882 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2883 	w3_array[col] += dx;
2884       }
2885       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2886       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2887 
2888       /*
2889 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2890 	w2 = F(x1 + dx) - F(x1)
2891       */
2892       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2893       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2894       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2895       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2896 
2897       /*
2898 	Loop over rows of vector, putting results into Jacobian matrix
2899       */
2900       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2901       for (l=0; l<coloring->nrows[k]; l++) {
2902 	row    = bs*coloring->rows[k][l];             /* local row index */
2903 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2904         for (j=0; j<bs; j++) {
2905   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2906           srows[j]  = row + start + j;
2907         }
2908 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2909       }
2910       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2911     }
2912   } /* endof for each color */
2913   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2914   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2915   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2916   ierr = PetscFree(srows);CHKERRQ(ierr);
2917 
2918   coloring->currentcolor = -1;
2919   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2920   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2921   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 /* -------------------------------------------------------------------*/
2926 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2927        MatGetRow_SeqBAIJ,
2928        MatRestoreRow_SeqBAIJ,
2929        MatMult_SeqBAIJ_N,
2930 /* 4*/ MatMultAdd_SeqBAIJ_N,
2931        MatMultTranspose_SeqBAIJ,
2932        MatMultTransposeAdd_SeqBAIJ,
2933        0,
2934        0,
2935        0,
2936 /*10*/ 0,
2937        MatLUFactor_SeqBAIJ,
2938        0,
2939        0,
2940        MatTranspose_SeqBAIJ,
2941 /*15*/ MatGetInfo_SeqBAIJ,
2942        MatEqual_SeqBAIJ,
2943        MatGetDiagonal_SeqBAIJ,
2944        MatDiagonalScale_SeqBAIJ,
2945        MatNorm_SeqBAIJ,
2946 /*20*/ 0,
2947        MatAssemblyEnd_SeqBAIJ,
2948        MatSetOption_SeqBAIJ,
2949        MatZeroEntries_SeqBAIJ,
2950 /*24*/ MatZeroRows_SeqBAIJ,
2951        0,
2952        0,
2953        0,
2954        0,
2955 /*29*/ MatSetUp_SeqBAIJ,
2956        0,
2957        0,
2958        MatGetArray_SeqBAIJ,
2959        MatRestoreArray_SeqBAIJ,
2960 /*34*/ MatDuplicate_SeqBAIJ,
2961        0,
2962        0,
2963        MatILUFactor_SeqBAIJ,
2964        0,
2965 /*39*/ MatAXPY_SeqBAIJ,
2966        MatGetSubMatrices_SeqBAIJ,
2967        MatIncreaseOverlap_SeqBAIJ,
2968        MatGetValues_SeqBAIJ,
2969        MatCopy_SeqBAIJ,
2970 /*44*/ 0,
2971        MatScale_SeqBAIJ,
2972        0,
2973        0,
2974        MatZeroRowsColumns_SeqBAIJ,
2975 /*49*/ 0,
2976        MatGetRowIJ_SeqBAIJ,
2977        MatRestoreRowIJ_SeqBAIJ,
2978        MatGetColumnIJ_SeqBAIJ,
2979        MatRestoreColumnIJ_SeqBAIJ,
2980 /*54*/ MatFDColoringCreate_SeqAIJ,
2981        0,
2982        0,
2983        0,
2984        MatSetValuesBlocked_SeqBAIJ,
2985 /*59*/ MatGetSubMatrix_SeqBAIJ,
2986        MatDestroy_SeqBAIJ,
2987        MatView_SeqBAIJ,
2988        0,
2989        0,
2990 /*64*/ 0,
2991        0,
2992        0,
2993        0,
2994        0,
2995 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2996        0,
2997        MatConvert_Basic,
2998        0,
2999        0,
3000 /*74*/ 0,
3001        MatFDColoringApply_BAIJ,
3002        0,
3003        0,
3004        0,
3005 /*79*/ 0,
3006        0,
3007        0,
3008        0,
3009        MatLoad_SeqBAIJ,
3010 /*84*/ 0,
3011        0,
3012        0,
3013        0,
3014        0,
3015 /*89*/ 0,
3016        0,
3017        0,
3018        0,
3019        0,
3020 /*94*/ 0,
3021        0,
3022        0,
3023        0,
3024        0,
3025 /*99*/0,
3026        0,
3027        0,
3028        0,
3029        0,
3030 /*104*/0,
3031        MatRealPart_SeqBAIJ,
3032        MatImaginaryPart_SeqBAIJ,
3033        0,
3034        0,
3035 /*109*/0,
3036        0,
3037        0,
3038        0,
3039        MatMissingDiagonal_SeqBAIJ,
3040 /*114*/0,
3041        0,
3042        0,
3043        0,
3044        0,
3045 /*119*/0,
3046        0,
3047        MatMultHermitianTranspose_SeqBAIJ,
3048        MatMultHermitianTransposeAdd_SeqBAIJ,
3049        0,
3050 /*124*/0,
3051        0,
3052        MatInvertBlockDiagonal_SeqBAIJ
3053 };
3054 
3055 EXTERN_C_BEGIN
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3058 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3059 {
3060   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3061   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3062   PetscErrorCode ierr;
3063 
3064   PetscFunctionBegin;
3065   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3066 
3067   /* allocate space for values if not already there */
3068   if (!aij->saved_values) {
3069     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3070     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3071   }
3072 
3073   /* copy values over */
3074   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3075   PetscFunctionReturn(0);
3076 }
3077 EXTERN_C_END
3078 
3079 EXTERN_C_BEGIN
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3082 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3083 {
3084   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3085   PetscErrorCode ierr;
3086   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3087 
3088   PetscFunctionBegin;
3089   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3090   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3091 
3092   /* copy values over */
3093   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3094   PetscFunctionReturn(0);
3095 }
3096 EXTERN_C_END
3097 
3098 EXTERN_C_BEGIN
3099 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3100 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3101 EXTERN_C_END
3102 
3103 EXTERN_C_BEGIN
3104 #undef __FUNCT__
3105 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3106 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3107 {
3108   Mat_SeqBAIJ    *b;
3109   PetscErrorCode ierr;
3110   PetscInt       i,mbs,nbs,bs2;
3111   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3112 
3113   PetscFunctionBegin;
3114   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3115   if (nz == MAT_SKIP_ALLOCATION) {
3116     skipallocation = PETSC_TRUE;
3117     nz             = 0;
3118   }
3119 
3120   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3121   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3122   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3123   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3124   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3125 
3126   B->preallocated = PETSC_TRUE;
3127 
3128   mbs  = B->rmap->n/bs;
3129   nbs  = B->cmap->n/bs;
3130   bs2  = bs*bs;
3131 
3132   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3133 
3134   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3135   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3136   if (nnz) {
3137     for (i=0; i<mbs; i++) {
3138       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3139       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3140     }
3141   }
3142 
3143   b       = (Mat_SeqBAIJ*)B->data;
3144   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3145     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3146   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3147 
3148   if (!flg) {
3149     switch (bs) {
3150     case 1:
3151       B->ops->mult            = MatMult_SeqBAIJ_1;
3152       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3153       B->ops->sor             = MatSOR_SeqBAIJ_1;
3154       break;
3155     case 2:
3156       B->ops->mult            = MatMult_SeqBAIJ_2;
3157       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3158       B->ops->sor             = MatSOR_SeqBAIJ_2;
3159       break;
3160     case 3:
3161       B->ops->mult            = MatMult_SeqBAIJ_3;
3162       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3163       B->ops->sor             = MatSOR_SeqBAIJ_3;
3164       break;
3165     case 4:
3166       B->ops->mult            = MatMult_SeqBAIJ_4;
3167       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3168       B->ops->sor             = MatSOR_SeqBAIJ_4;
3169       break;
3170     case 5:
3171       B->ops->mult            = MatMult_SeqBAIJ_5;
3172       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3173       B->ops->sor             = MatSOR_SeqBAIJ_5;
3174       break;
3175     case 6:
3176       B->ops->mult            = MatMult_SeqBAIJ_6;
3177       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3178       B->ops->sor             = MatSOR_SeqBAIJ_6;
3179       break;
3180     case 7:
3181       B->ops->mult            = MatMult_SeqBAIJ_7;
3182       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3183       B->ops->sor             = MatSOR_SeqBAIJ_7;
3184       break;
3185     case 15:
3186       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3187       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3188       B->ops->sor             = MatSOR_SeqBAIJ_N;
3189       break;
3190     default:
3191       B->ops->mult            = MatMult_SeqBAIJ_N;
3192       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3193       B->ops->sor             = MatSOR_SeqBAIJ_N;
3194       break;
3195     }
3196   }
3197   b->mbs       = mbs;
3198   b->nbs       = nbs;
3199   if (!skipallocation) {
3200     if (!b->imax) {
3201       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3202       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3203       b->free_imax_ilen = PETSC_TRUE;
3204     }
3205     /* b->ilen will count nonzeros in each block row so far. */
3206     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3207     if (!nnz) {
3208       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3209       else if (nz < 0) nz = 1;
3210       for (i=0; i<mbs; i++) b->imax[i] = nz;
3211       nz = nz*mbs;
3212     } else {
3213       nz = 0;
3214       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3215     }
3216 
3217     /* allocate the matrix space */
3218     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3219     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3220     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3221     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3222     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3223     b->singlemalloc = PETSC_TRUE;
3224     b->i[0] = 0;
3225     for (i=1; i<mbs+1; i++) {
3226       b->i[i] = b->i[i-1] + b->imax[i-1];
3227     }
3228     b->free_a     = PETSC_TRUE;
3229     b->free_ij    = PETSC_TRUE;
3230   } else {
3231     b->free_a     = PETSC_FALSE;
3232     b->free_ij    = PETSC_FALSE;
3233   }
3234 
3235   b->bs2              = bs2;
3236   b->mbs              = mbs;
3237   b->nz               = 0;
3238   b->maxnz            = nz;
3239   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3240   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3241   PetscFunctionReturn(0);
3242 }
3243 EXTERN_C_END
3244 
3245 EXTERN_C_BEGIN
3246 #undef __FUNCT__
3247 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3248 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3249 {
3250   PetscInt       i,m,nz,nz_max=0,*nnz;
3251   PetscScalar    *values=0;
3252   PetscErrorCode ierr;
3253 
3254   PetscFunctionBegin;
3255   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3256   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3257   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3258   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3259   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3260   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3261   m = B->rmap->n/bs;
3262 
3263   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3264   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3265   for(i=0; i<m; i++) {
3266     nz = ii[i+1]- ii[i];
3267     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3268     nz_max = PetscMax(nz_max, nz);
3269     nnz[i] = nz;
3270   }
3271   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3272   ierr = PetscFree(nnz);CHKERRQ(ierr);
3273 
3274   values = (PetscScalar*)V;
3275   if (!values) {
3276     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3277     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3278   }
3279   for (i=0; i<m; i++) {
3280     PetscInt          ncols  = ii[i+1] - ii[i];
3281     const PetscInt    *icols = jj + ii[i];
3282     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3283     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3284   }
3285   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3286   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3287   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3288   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291 EXTERN_C_END
3292 
3293 
3294 EXTERN_C_BEGIN
3295 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3296 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3297 #if defined(PETSC_HAVE_MUMPS)
3298 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3299 #endif
3300 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3301 EXTERN_C_END
3302 
3303 /*MC
3304    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3305    block sparse compressed row format.
3306 
3307    Options Database Keys:
3308 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3309 
3310   Level: beginner
3311 
3312 .seealso: MatCreateSeqBAIJ()
3313 M*/
3314 
3315 EXTERN_C_BEGIN
3316 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3317 EXTERN_C_END
3318 
3319 EXTERN_C_BEGIN
3320 #undef __FUNCT__
3321 #define __FUNCT__ "MatCreate_SeqBAIJ"
3322 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3323 {
3324   PetscErrorCode ierr;
3325   PetscMPIInt    size;
3326   Mat_SeqBAIJ    *b;
3327 
3328   PetscFunctionBegin;
3329   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3330   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3331 
3332   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3333   B->data = (void*)b;
3334   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3335   b->row                   = 0;
3336   b->col                   = 0;
3337   b->icol                  = 0;
3338   b->reallocs              = 0;
3339   b->saved_values          = 0;
3340 
3341   b->roworiented           = PETSC_TRUE;
3342   b->nonew                 = 0;
3343   b->diag                  = 0;
3344   b->solve_work            = 0;
3345   b->mult_work             = 0;
3346   B->spptr                 = 0;
3347   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3348   b->keepnonzeropattern    = PETSC_FALSE;
3349   b->xtoy                  = 0;
3350   b->XtoY                  = 0;
3351   B->same_nonzero          = PETSC_FALSE;
3352 
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3354                                      "MatGetFactorAvailable_seqbaij_petsc",
3355                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3357                                      "MatGetFactor_seqbaij_petsc",
3358                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3360                                      "MatGetFactor_seqbaij_bstrm",
3361                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3362 #if defined(PETSC_HAVE_MUMPS)
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3364 #endif
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3366                                      "MatInvertBlockDiagonal_SeqBAIJ",
3367                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3369                                      "MatStoreValues_SeqBAIJ",
3370                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3372                                      "MatRetrieveValues_SeqBAIJ",
3373                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3375                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3376                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3378                                      "MatConvert_SeqBAIJ_SeqAIJ",
3379                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3381                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3382                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3383   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3384                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3385                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3386   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3387                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3388                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3389   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3390                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3391                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3392   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3393                                      "MatIsTranspose_SeqBAIJ",
3394                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3395   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3396   PetscFunctionReturn(0);
3397 }
3398 EXTERN_C_END
3399 
3400 #undef __FUNCT__
3401 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3402 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3403 {
3404   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3405   PetscErrorCode ierr;
3406   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3407 
3408   PetscFunctionBegin;
3409   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3410 
3411   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3412     c->imax = a->imax;
3413     c->ilen = a->ilen;
3414     c->free_imax_ilen = PETSC_FALSE;
3415   } else {
3416     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3417     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3418     for (i=0; i<mbs; i++) {
3419       c->imax[i] = a->imax[i];
3420       c->ilen[i] = a->ilen[i];
3421     }
3422     c->free_imax_ilen = PETSC_TRUE;
3423   }
3424 
3425   /* allocate the matrix space */
3426   if (mallocmatspace){
3427     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3428       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3429       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3430       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3431       c->i            = a->i;
3432       c->j            = a->j;
3433       c->singlemalloc = PETSC_FALSE;
3434       c->free_a       = PETSC_TRUE;
3435       c->free_ij      = PETSC_FALSE;
3436       c->parent       = A;
3437       C->preallocated = PETSC_TRUE;
3438       C->assembled    = PETSC_TRUE;
3439       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3440       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3441       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3442     } else {
3443       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3444       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3445       c->singlemalloc = PETSC_TRUE;
3446       c->free_a       = PETSC_TRUE;
3447       c->free_ij      = PETSC_TRUE;
3448       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3449       if (mbs > 0) {
3450 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3451 	if (cpvalues == MAT_COPY_VALUES) {
3452 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3453 	} else {
3454 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3455 	}
3456       }
3457       C->preallocated = PETSC_TRUE;
3458       C->assembled    = PETSC_TRUE;
3459     }
3460   }
3461 
3462   c->roworiented = a->roworiented;
3463   c->nonew       = a->nonew;
3464   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3465   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3466   c->bs2         = a->bs2;
3467   c->mbs         = a->mbs;
3468   c->nbs         = a->nbs;
3469 
3470   if (a->diag) {
3471     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3472       c->diag      = a->diag;
3473       c->free_diag = PETSC_FALSE;
3474     } else {
3475       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3476       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3477       for (i=0; i<mbs; i++) {
3478         c->diag[i] = a->diag[i];
3479       }
3480       c->free_diag = PETSC_TRUE;
3481     }
3482   } else c->diag        = 0;
3483   c->nz                 = a->nz;
3484   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3485   c->solve_work         = 0;
3486   c->mult_work          = 0;
3487 
3488   c->compressedrow.use     = a->compressedrow.use;
3489   c->compressedrow.nrows   = a->compressedrow.nrows;
3490   c->compressedrow.check   = a->compressedrow.check;
3491   if (a->compressedrow.use){
3492     i = a->compressedrow.nrows;
3493     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3494     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3495     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3496     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3497   } else {
3498     c->compressedrow.use    = PETSC_FALSE;
3499     c->compressedrow.i      = PETSC_NULL;
3500     c->compressedrow.rindex = PETSC_NULL;
3501   }
3502   C->same_nonzero = A->same_nonzero;
3503   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3504   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3510 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3511 {
3512     PetscErrorCode ierr;
3513 
3514   PetscFunctionBegin;
3515   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3516   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3517   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3518   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 #undef __FUNCT__
3523 #define __FUNCT__ "MatLoad_SeqBAIJ"
3524 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3525 {
3526   Mat_SeqBAIJ    *a;
3527   PetscErrorCode ierr;
3528   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3529   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3530   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3531   PetscInt       *masked,nmask,tmp,bs2,ishift;
3532   PetscMPIInt    size;
3533   int            fd;
3534   PetscScalar    *aa;
3535   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3536 
3537   PetscFunctionBegin;
3538   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3539   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3540   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3541   bs2  = bs*bs;
3542 
3543   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3544   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3545   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3546   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3547   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3548   M = header[1]; N = header[2]; nz = header[3];
3549 
3550   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3551   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3552 
3553   /*
3554      This code adds extra rows to make sure the number of rows is
3555     divisible by the blocksize
3556   */
3557   mbs        = M/bs;
3558   extra_rows = bs - M + bs*(mbs);
3559   if (extra_rows == bs) extra_rows = 0;
3560   else                  mbs++;
3561   if (extra_rows) {
3562     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3563   }
3564 
3565   /* Set global sizes if not already set */
3566   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3567     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3568   } else { /* Check if the matrix global sizes are correct */
3569     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3570     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3571       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3572     }
3573     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3574   }
3575 
3576   /* read in row lengths */
3577   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3578   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3579   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3580 
3581   /* read in column indices */
3582   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3583   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3584   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3585 
3586   /* loop over row lengths determining block row lengths */
3587   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3588   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3589   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3590   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3591   rowcount = 0;
3592   nzcount = 0;
3593   for (i=0; i<mbs; i++) {
3594     nmask = 0;
3595     for (j=0; j<bs; j++) {
3596       kmax = rowlengths[rowcount];
3597       for (k=0; k<kmax; k++) {
3598         tmp = jj[nzcount++]/bs;
3599         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3600       }
3601       rowcount++;
3602     }
3603     browlengths[i] += nmask;
3604     /* zero out the mask elements we set */
3605     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3606   }
3607 
3608   /* Do preallocation  */
3609   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3610   a = (Mat_SeqBAIJ*)newmat->data;
3611 
3612   /* set matrix "i" values */
3613   a->i[0] = 0;
3614   for (i=1; i<= mbs; i++) {
3615     a->i[i]      = a->i[i-1] + browlengths[i-1];
3616     a->ilen[i-1] = browlengths[i-1];
3617   }
3618   a->nz         = 0;
3619   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3620 
3621   /* read in nonzero values */
3622   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3623   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3624   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3625 
3626   /* set "a" and "j" values into matrix */
3627   nzcount = 0; jcount = 0;
3628   for (i=0; i<mbs; i++) {
3629     nzcountb = nzcount;
3630     nmask    = 0;
3631     for (j=0; j<bs; j++) {
3632       kmax = rowlengths[i*bs+j];
3633       for (k=0; k<kmax; k++) {
3634         tmp = jj[nzcount++]/bs;
3635 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3636       }
3637     }
3638     /* sort the masked values */
3639     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3640 
3641     /* set "j" values into matrix */
3642     maskcount = 1;
3643     for (j=0; j<nmask; j++) {
3644       a->j[jcount++]  = masked[j];
3645       mask[masked[j]] = maskcount++;
3646     }
3647     /* set "a" values into matrix */
3648     ishift = bs2*a->i[i];
3649     for (j=0; j<bs; j++) {
3650       kmax = rowlengths[i*bs+j];
3651       for (k=0; k<kmax; k++) {
3652         tmp       = jj[nzcountb]/bs ;
3653         block     = mask[tmp] - 1;
3654         point     = jj[nzcountb] - bs*tmp;
3655         idx       = ishift + bs2*block + j + bs*point;
3656         a->a[idx] = (MatScalar)aa[nzcountb++];
3657       }
3658     }
3659     /* zero out the mask elements we set */
3660     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3661   }
3662   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3663 
3664   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3665   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3666   ierr = PetscFree(aa);CHKERRQ(ierr);
3667   ierr = PetscFree(jj);CHKERRQ(ierr);
3668   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3669 
3670   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3671   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3672   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3673   PetscFunctionReturn(0);
3674 }
3675 
3676 #undef __FUNCT__
3677 #define __FUNCT__ "MatCreateSeqBAIJ"
3678 /*@C
3679    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3680    compressed row) format.  For good matrix assembly performance the
3681    user should preallocate the matrix storage by setting the parameter nz
3682    (or the array nnz).  By setting these parameters accurately, performance
3683    during matrix assembly can be increased by more than a factor of 50.
3684 
3685    Collective on MPI_Comm
3686 
3687    Input Parameters:
3688 +  comm - MPI communicator, set to PETSC_COMM_SELF
3689 .  bs - size of block
3690 .  m - number of rows
3691 .  n - number of columns
3692 .  nz - number of nonzero blocks  per block row (same for all rows)
3693 -  nnz - array containing the number of nonzero blocks in the various block rows
3694          (possibly different for each block row) or PETSC_NULL
3695 
3696    Output Parameter:
3697 .  A - the matrix
3698 
3699    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3700    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3701    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3702 
3703    Options Database Keys:
3704 .   -mat_no_unroll - uses code that does not unroll the loops in the
3705                      block calculations (much slower)
3706 .    -mat_block_size - size of the blocks to use
3707 
3708    Level: intermediate
3709 
3710    Notes:
3711    The number of rows and columns must be divisible by blocksize.
3712 
3713    If the nnz parameter is given then the nz parameter is ignored
3714 
3715    A nonzero block is any block that as 1 or more nonzeros in it
3716 
3717    The block AIJ format is fully compatible with standard Fortran 77
3718    storage.  That is, the stored row and column indices can begin at
3719    either one (as in Fortran) or zero.  See the users' manual for details.
3720 
3721    Specify the preallocated storage with either nz or nnz (not both).
3722    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3723    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3724    matrices.
3725 
3726 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3727 @*/
3728 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3729 {
3730   PetscErrorCode ierr;
3731 
3732   PetscFunctionBegin;
3733   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3734   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3735   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3736   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3737   PetscFunctionReturn(0);
3738 }
3739 
3740 #undef __FUNCT__
3741 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3742 /*@C
3743    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3744    per row in the matrix. For good matrix assembly performance the
3745    user should preallocate the matrix storage by setting the parameter nz
3746    (or the array nnz).  By setting these parameters accurately, performance
3747    during matrix assembly can be increased by more than a factor of 50.
3748 
3749    Collective on MPI_Comm
3750 
3751    Input Parameters:
3752 +  A - the matrix
3753 .  bs - size of block
3754 .  nz - number of block nonzeros per block row (same for all rows)
3755 -  nnz - array containing the number of block nonzeros in the various block rows
3756          (possibly different for each block row) or PETSC_NULL
3757 
3758    Options Database Keys:
3759 .   -mat_no_unroll - uses code that does not unroll the loops in the
3760                      block calculations (much slower)
3761 .    -mat_block_size - size of the blocks to use
3762 
3763    Level: intermediate
3764 
3765    Notes:
3766    If the nnz parameter is given then the nz parameter is ignored
3767 
3768    You can call MatGetInfo() to get information on how effective the preallocation was;
3769    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3770    You can also run with the option -info and look for messages with the string
3771    malloc in them to see if additional memory allocation was needed.
3772 
3773    The block AIJ format is fully compatible with standard Fortran 77
3774    storage.  That is, the stored row and column indices can begin at
3775    either one (as in Fortran) or zero.  See the users' manual for details.
3776 
3777    Specify the preallocated storage with either nz or nnz (not both).
3778    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3779    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3780 
3781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3782 @*/
3783 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3784 {
3785   PetscErrorCode ierr;
3786 
3787   PetscFunctionBegin;
3788   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3789   PetscValidType(B,1);
3790   PetscValidLogicalCollectiveInt(B,bs,2);
3791   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3792   PetscFunctionReturn(0);
3793 }
3794 
3795 #undef __FUNCT__
3796 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3797 /*@C
3798    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3799    (the default sequential PETSc format).
3800 
3801    Collective on MPI_Comm
3802 
3803    Input Parameters:
3804 +  A - the matrix
3805 .  i - the indices into j for the start of each local row (starts with zero)
3806 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3807 -  v - optional values in the matrix
3808 
3809    Level: developer
3810 
3811 .keywords: matrix, aij, compressed row, sparse
3812 
3813 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3814 @*/
3815 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3816 {
3817   PetscErrorCode ierr;
3818 
3819   PetscFunctionBegin;
3820   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3821   PetscValidType(B,1);
3822   PetscValidLogicalCollectiveInt(B,bs,2);
3823   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3824   PetscFunctionReturn(0);
3825 }
3826 
3827 
3828 #undef __FUNCT__
3829 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3830 /*@
3831      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3832 
3833      Collective on MPI_Comm
3834 
3835    Input Parameters:
3836 +  comm - must be an MPI communicator of size 1
3837 .  bs - size of block
3838 .  m - number of rows
3839 .  n - number of columns
3840 .  i - row indices
3841 .  j - column indices
3842 -  a - matrix values
3843 
3844    Output Parameter:
3845 .  mat - the matrix
3846 
3847    Level: advanced
3848 
3849    Notes:
3850        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3851     once the matrix is destroyed
3852 
3853        You cannot set new nonzero locations into this matrix, that will generate an error.
3854 
3855        The i and j indices are 0 based
3856 
3857        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3858 
3859 
3860 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3861 
3862 @*/
3863 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3864 {
3865   PetscErrorCode ierr;
3866   PetscInt       ii;
3867   Mat_SeqBAIJ    *baij;
3868 
3869   PetscFunctionBegin;
3870   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3871   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3872 
3873   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3874   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3875   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3876   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3877   baij = (Mat_SeqBAIJ*)(*mat)->data;
3878   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3879   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3880 
3881   baij->i = i;
3882   baij->j = j;
3883   baij->a = a;
3884   baij->singlemalloc = PETSC_FALSE;
3885   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3886   baij->free_a       = PETSC_FALSE;
3887   baij->free_ij      = PETSC_FALSE;
3888 
3889   for (ii=0; ii<m; ii++) {
3890     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3891 #if defined(PETSC_USE_DEBUG)
3892     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3893 #endif
3894   }
3895 #if defined(PETSC_USE_DEBUG)
3896   for (ii=0; ii<baij->i[m]; ii++) {
3897     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3898     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3899   }
3900 #endif
3901 
3902   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3903   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3904   PetscFunctionReturn(0);
3905 }
3906