xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 3bf036e263fd78807e2931ff42d9ddcd8aae3fd4)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1305 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1310   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1311 
1312   PetscFunctionBegin;
1313   *nn = n;
1314   if (!ia) PetscFunctionReturn(0);
1315   if (symmetric) {
1316     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1317     nz   = tia[n];
1318   } else {
1319     tia = a->i; tja = a->j;
1320   }
1321 
1322   if (!blockcompressed && bs > 1) {
1323     (*nn) *= bs;
1324     /* malloc & create the natural set of indices */
1325     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1326     if (n) {
1327       (*ia)[0] = 0;
1328       for (j=1; j<bs; j++) {
1329         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1330       }
1331     }
1332 
1333     for (i=1; i<n; i++) {
1334       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1335       for (j=1; j<bs; j++) {
1336         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1337       }
1338     }
1339     if (n) {
1340       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1341     }
1342 
1343     if (inja) {
1344       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1345       cnt = 0;
1346       for (i=0; i<n; i++) {
1347         for (j=0; j<bs; j++) {
1348           for (k=tia[i]; k<tia[i+1]; k++) {
1349             for (l=0; l<bs; l++) {
1350               (*ja)[cnt++] = bs*tja[k] + l;
1351 	    }
1352           }
1353         }
1354       }
1355     }
1356 
1357     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1358       ierr = PetscFree(tia);CHKERRQ(ierr);
1359       ierr = PetscFree(tja);CHKERRQ(ierr);
1360     }
1361   } else if (oshift == 1) {
1362     if (symmetric) {
1363       nz = tia[A->rmap->n/bs];
1364       /*  add 1 to i and j indices */
1365       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1366       *ia = tia;
1367       if (ja) {
1368 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1369         *ja = tja;
1370       }
1371     } else {
1372       nz = a->i[A->rmap->n/bs];
1373       /* malloc space and  add 1 to i and j indices */
1374       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1375       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1376       if (ja) {
1377 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1378 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1379       }
1380     }
1381   } else {
1382     *ia = tia;
1383     if (ja) *ja = tja;
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1390 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1391 {
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   if (!ia) PetscFunctionReturn(0);
1396   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1397     ierr = PetscFree(*ia);CHKERRQ(ierr);
1398     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1405 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1406 {
1407   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411 #if defined(PETSC_USE_LOG)
1412   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1413 #endif
1414   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1415   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1416   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1417   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1418   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1419   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1420   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1421   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1422   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1423   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1424   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1425   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1426   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1427 
1428   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1429   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1430   ierr = PetscFree(A->data);CHKERRQ(ierr);
1431 
1432   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1448 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1449 {
1450   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   switch (op) {
1455   case MAT_ROW_ORIENTED:
1456     a->roworiented    = flg;
1457     break;
1458   case MAT_KEEP_NONZERO_PATTERN:
1459     a->keepnonzeropattern = flg;
1460     break;
1461   case MAT_NEW_NONZERO_LOCATIONS:
1462     a->nonew          = (flg ? 0 : 1);
1463     break;
1464   case MAT_NEW_NONZERO_LOCATION_ERR:
1465     a->nonew          = (flg ? -1 : 0);
1466     break;
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468     a->nonew          = (flg ? -2 : 0);
1469     break;
1470   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1471     a->nounused       = (flg ? -1 : 0);
1472     break;
1473   case MAT_CHECK_COMPRESSED_ROW:
1474     a->compressedrow.check = flg;
1475     break;
1476   case MAT_NEW_DIAGONALS:
1477   case MAT_IGNORE_OFF_PROC_ENTRIES:
1478   case MAT_USE_HASH_TABLE:
1479     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1480     break;
1481   case MAT_SPD:
1482   case MAT_SYMMETRIC:
1483   case MAT_STRUCTURALLY_SYMMETRIC:
1484   case MAT_HERMITIAN:
1485   case MAT_SYMMETRY_ETERNAL:
1486     /* These options are handled directly by MatSetOption() */
1487     break;
1488   default:
1489     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1496 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1497 {
1498   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1499   PetscErrorCode ierr;
1500   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1501   MatScalar      *aa,*aa_i;
1502   PetscScalar    *v_i;
1503 
1504   PetscFunctionBegin;
1505   bs  = A->rmap->bs;
1506   ai  = a->i;
1507   aj  = a->j;
1508   aa  = a->a;
1509   bs2 = a->bs2;
1510 
1511   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1512 
1513   bn  = row/bs;   /* Block number */
1514   bp  = row % bs; /* Block Position */
1515   M   = ai[bn+1] - ai[bn];
1516   *nz = bs*M;
1517 
1518   if (v) {
1519     *v = 0;
1520     if (*nz) {
1521       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1522       for (i=0; i<M; i++) { /* for each block in the block row */
1523         v_i  = *v + i*bs;
1524         aa_i = aa + bs2*(ai[bn] + i);
1525         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1526       }
1527     }
1528   }
1529 
1530   if (idx) {
1531     *idx = 0;
1532     if (*nz) {
1533       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1534       for (i=0; i<M; i++) { /* for each block in the block row */
1535         idx_i = *idx + i*bs;
1536         itmp  = bs*aj[ai[bn] + i];
1537         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1538       }
1539     }
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1546 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1547 {
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1552   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1560 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1561 {
1562   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1563   Mat            C;
1564   PetscErrorCode ierr;
1565   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1566   PetscInt       *rows,*cols,bs2=a->bs2;
1567   MatScalar      *array;
1568 
1569   PetscFunctionBegin;
1570   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1571   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1572     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1573     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1574 
1575     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1576     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1577     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1578     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1579     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1580     ierr = PetscFree(col);CHKERRQ(ierr);
1581   } else {
1582     C = *B;
1583   }
1584 
1585   array = a->a;
1586   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1587   for (i=0; i<mbs; i++) {
1588     cols[0] = i*bs;
1589     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1590     len = ai[i+1] - ai[i];
1591     for (j=0; j<len; j++) {
1592       rows[0] = (*aj++)*bs;
1593       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1594       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1595       array += bs2;
1596     }
1597   }
1598   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1599 
1600   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602 
1603   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1604     *B = C;
1605   } else {
1606     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 EXTERN_C_BEGIN
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1614 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1615 {
1616   PetscErrorCode ierr;
1617   Mat            Btrans;
1618 
1619   PetscFunctionBegin;
1620   *f = PETSC_FALSE;
1621   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1622   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1623   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1624   PetscFunctionReturn(0);
1625 }
1626 EXTERN_C_END
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1630 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1631 {
1632   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1633   PetscErrorCode ierr;
1634   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1635   int            fd;
1636   PetscScalar    *aa;
1637   FILE           *file;
1638 
1639   PetscFunctionBegin;
1640   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1641   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1642   col_lens[0] = MAT_FILE_CLASSID;
1643 
1644   col_lens[1] = A->rmap->N;
1645   col_lens[2] = A->cmap->n;
1646   col_lens[3] = a->nz*bs2;
1647 
1648   /* store lengths of each row and write (including header) to file */
1649   count = 0;
1650   for (i=0; i<a->mbs; i++) {
1651     for (j=0; j<bs; j++) {
1652       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1653     }
1654   }
1655   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1656   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1657 
1658   /* store column indices (zero start index) */
1659   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1660   count = 0;
1661   for (i=0; i<a->mbs; i++) {
1662     for (j=0; j<bs; j++) {
1663       for (k=a->i[i]; k<a->i[i+1]; k++) {
1664         for (l=0; l<bs; l++) {
1665           jj[count++] = bs*a->j[k] + l;
1666         }
1667       }
1668     }
1669   }
1670   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1671   ierr = PetscFree(jj);CHKERRQ(ierr);
1672 
1673   /* store nonzero values */
1674   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1675   count = 0;
1676   for (i=0; i<a->mbs; i++) {
1677     for (j=0; j<bs; j++) {
1678       for (k=a->i[i]; k<a->i[i+1]; k++) {
1679         for (l=0; l<bs; l++) {
1680           aa[count++] = a->a[bs2*k + l*bs + j];
1681         }
1682       }
1683     }
1684   }
1685   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1686   ierr = PetscFree(aa);CHKERRQ(ierr);
1687 
1688   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1689   if (file) {
1690     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1697 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1698 {
1699   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1700   PetscErrorCode    ierr;
1701   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1702   PetscViewerFormat format;
1703 
1704   PetscFunctionBegin;
1705   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1706   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1707     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1708   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1709     Mat aij;
1710     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1711     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1712     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1713   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1714      PetscFunctionReturn(0);
1715   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1716     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1717     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1718     for (i=0; i<a->mbs; i++) {
1719       for (j=0; j<bs; j++) {
1720         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1721         for (k=a->i[i]; k<a->i[i+1]; k++) {
1722           for (l=0; l<bs; l++) {
1723 #if defined(PETSC_USE_COMPLEX)
1724             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1725               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1726                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1727             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1728               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1729                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1730             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1731               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             }
1733 #else
1734             if (a->a[bs2*k + l*bs + j] != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1736             }
1737 #endif
1738           }
1739         }
1740         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1741       }
1742     }
1743     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1744   } else {
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1746     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1747     for (i=0; i<a->mbs; i++) {
1748       for (j=0; j<bs; j++) {
1749         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1750         for (k=a->i[i]; k<a->i[i+1]; k++) {
1751           for (l=0; l<bs; l++) {
1752 #if defined(PETSC_USE_COMPLEX)
1753             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1754               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1755                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1756             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1757               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1758                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1759             } else {
1760               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             }
1762 #else
1763             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1764 #endif
1765           }
1766         }
1767         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1768       }
1769     }
1770     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1771   }
1772   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1778 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1779 {
1780   Mat            A = (Mat) Aa;
1781   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1782   PetscErrorCode ierr;
1783   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1784   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1785   MatScalar      *aa;
1786   PetscViewer    viewer;
1787   PetscViewerFormat format;
1788 
1789   PetscFunctionBegin;
1790   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1791   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1792 
1793   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1794 
1795   /* loop over matrix elements drawing boxes */
1796 
1797   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1798     color = PETSC_DRAW_BLUE;
1799     for (i=0,row=0; i<mbs; i++,row+=bs) {
1800       for (j=a->i[i]; j<a->i[i+1]; j++) {
1801         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1802         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1803         aa = a->a + j*bs2;
1804         for (k=0; k<bs; k++) {
1805           for (l=0; l<bs; l++) {
1806             if (PetscRealPart(*aa++) >=  0.) continue;
1807             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1808           }
1809         }
1810       }
1811     }
1812     color = PETSC_DRAW_CYAN;
1813     for (i=0,row=0; i<mbs; i++,row+=bs) {
1814       for (j=a->i[i]; j<a->i[i+1]; j++) {
1815         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1816         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1817         aa = a->a + j*bs2;
1818         for (k=0; k<bs; k++) {
1819           for (l=0; l<bs; l++) {
1820             if (PetscRealPart(*aa++) != 0.) continue;
1821             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1822           }
1823         }
1824       }
1825     }
1826     color = PETSC_DRAW_RED;
1827     for (i=0,row=0; i<mbs; i++,row+=bs) {
1828       for (j=a->i[i]; j<a->i[i+1]; j++) {
1829         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1830         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1831         aa = a->a + j*bs2;
1832         for (k=0; k<bs; k++) {
1833           for (l=0; l<bs; l++) {
1834             if (PetscRealPart(*aa++) <= 0.) continue;
1835             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1836           }
1837         }
1838       }
1839     }
1840   } else {
1841     /* use contour shading to indicate magnitude of values */
1842     /* first determine max of all nonzero values */
1843     PetscDraw   popup;
1844     PetscReal scale,maxv = 0.0;
1845 
1846     for (i=0; i<a->nz*a->bs2; i++) {
1847       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1848     }
1849     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1850     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1851     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1852     for (i=0,row=0; i<mbs; i++,row+=bs) {
1853       for (j=a->i[i]; j<a->i[i+1]; j++) {
1854         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1855         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1856         aa = a->a + j*bs2;
1857         for (k=0; k<bs; k++) {
1858           for (l=0; l<bs; l++) {
1859             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1860             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1861           }
1862         }
1863       }
1864     }
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1871 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1872 {
1873   PetscErrorCode ierr;
1874   PetscReal      xl,yl,xr,yr,w,h;
1875   PetscDraw      draw;
1876   PetscBool      isnull;
1877 
1878   PetscFunctionBegin;
1879 
1880   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1881   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1882 
1883   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1884   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1885   xr += w;    yr += h;  xl = -w;     yl = -h;
1886   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1887   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1888   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatView_SeqBAIJ"
1894 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1895 {
1896   PetscErrorCode ierr;
1897   PetscBool      iascii,isbinary,isdraw;
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1901   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1902   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1903   if (iascii){
1904     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1905   } else if (isbinary) {
1906     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1907   } else if (isdraw) {
1908     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1909   } else {
1910     Mat B;
1911     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1912     ierr = MatView(B,viewer);CHKERRQ(ierr);
1913     ierr = MatDestroy(&B);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1921 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1922 {
1923   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1924   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1925   PetscInt    *ai = a->i,*ailen = a->ilen;
1926   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1927   MatScalar   *ap,*aa = a->a;
1928 
1929   PetscFunctionBegin;
1930   for (k=0; k<m; k++) { /* loop over rows */
1931     row  = im[k]; brow = row/bs;
1932     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1933     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1934     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1935     nrow = ailen[brow];
1936     for (l=0; l<n; l++) { /* loop over columns */
1937       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1938       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1939       col  = in[l] ;
1940       bcol = col/bs;
1941       cidx = col%bs;
1942       ridx = row%bs;
1943       high = nrow;
1944       low  = 0; /* assume unsorted */
1945       while (high-low > 5) {
1946         t = (low+high)/2;
1947         if (rp[t] > bcol) high = t;
1948         else             low  = t;
1949       }
1950       for (i=low; i<high; i++) {
1951         if (rp[i] > bcol) break;
1952         if (rp[i] == bcol) {
1953           *v++ = ap[bs2*i+bs*cidx+ridx];
1954           goto finished;
1955         }
1956       }
1957       *v++ = 0.0;
1958       finished:;
1959     }
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1966 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1967 {
1968   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1969   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1970   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1971   PetscErrorCode    ierr;
1972   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1973   PetscBool         roworiented=a->roworiented;
1974   const PetscScalar *value = v;
1975   MatScalar         *ap,*aa = a->a,*bap;
1976 
1977   PetscFunctionBegin;
1978   if (roworiented) {
1979     stepval = (n-1)*bs;
1980   } else {
1981     stepval = (m-1)*bs;
1982   }
1983   for (k=0; k<m; k++) { /* loop over added rows */
1984     row  = im[k];
1985     if (row < 0) continue;
1986 #if defined(PETSC_USE_DEBUG)
1987     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1988 #endif
1989     rp   = aj + ai[row];
1990     ap   = aa + bs2*ai[row];
1991     rmax = imax[row];
1992     nrow = ailen[row];
1993     low  = 0;
1994     high = nrow;
1995     for (l=0; l<n; l++) { /* loop over added columns */
1996       if (in[l] < 0) continue;
1997 #if defined(PETSC_USE_DEBUG)
1998       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1999 #endif
2000       col = in[l];
2001       if (roworiented) {
2002         value = v + (k*(stepval+bs) + l)*bs;
2003       } else {
2004         value = v + (l*(stepval+bs) + k)*bs;
2005       }
2006       if (col <= lastcol) low = 0; else high = nrow;
2007       lastcol = col;
2008       while (high-low > 7) {
2009         t = (low+high)/2;
2010         if (rp[t] > col) high = t;
2011         else             low  = t;
2012       }
2013       for (i=low; i<high; i++) {
2014         if (rp[i] > col) break;
2015         if (rp[i] == col) {
2016           bap  = ap +  bs2*i;
2017           if (roworiented) {
2018             if (is == ADD_VALUES) {
2019               for (ii=0; ii<bs; ii++,value+=stepval) {
2020                 for (jj=ii; jj<bs2; jj+=bs) {
2021                   bap[jj] += *value++;
2022                 }
2023               }
2024             } else {
2025               for (ii=0; ii<bs; ii++,value+=stepval) {
2026                 for (jj=ii; jj<bs2; jj+=bs) {
2027                   bap[jj] = *value++;
2028                 }
2029               }
2030             }
2031           } else {
2032             if (is == ADD_VALUES) {
2033               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2034                 for (jj=0; jj<bs; jj++) {
2035                   bap[jj] += value[jj];
2036                 }
2037                 bap += bs;
2038               }
2039             } else {
2040               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2041                 for (jj=0; jj<bs; jj++) {
2042                   bap[jj]  = value[jj];
2043                 }
2044                 bap += bs;
2045               }
2046             }
2047           }
2048           goto noinsert2;
2049         }
2050       }
2051       if (nonew == 1) goto noinsert2;
2052       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2053       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2054       N = nrow++ - 1; high++;
2055       /* shift up all the later entries in this row */
2056       for (ii=N; ii>=i; ii--) {
2057         rp[ii+1] = rp[ii];
2058         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2059       }
2060       if (N >= i) {
2061         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2062       }
2063       rp[i] = col;
2064       bap   = ap +  bs2*i;
2065       if (roworiented) {
2066         for (ii=0; ii<bs; ii++,value+=stepval) {
2067           for (jj=ii; jj<bs2; jj+=bs) {
2068             bap[jj] = *value++;
2069           }
2070         }
2071       } else {
2072         for (ii=0; ii<bs; ii++,value+=stepval) {
2073           for (jj=0; jj<bs; jj++) {
2074             *bap++  = *value++;
2075           }
2076         }
2077       }
2078       noinsert2:;
2079       low = i;
2080     }
2081     ailen[row] = nrow;
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2088 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2089 {
2090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2091   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2092   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2093   PetscErrorCode ierr;
2094   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2095   MatScalar      *aa = a->a,*ap;
2096   PetscReal      ratio=0.6;
2097 
2098   PetscFunctionBegin;
2099   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2100 
2101   if (m) rmax = ailen[0];
2102   for (i=1; i<mbs; i++) {
2103     /* move each row back by the amount of empty slots (fshift) before it*/
2104     fshift += imax[i-1] - ailen[i-1];
2105     rmax   = PetscMax(rmax,ailen[i]);
2106     if (fshift) {
2107       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2108       N = ailen[i];
2109       for (j=0; j<N; j++) {
2110         ip[j-fshift] = ip[j];
2111         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2112       }
2113     }
2114     ai[i] = ai[i-1] + ailen[i-1];
2115   }
2116   if (mbs) {
2117     fshift += imax[mbs-1] - ailen[mbs-1];
2118     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2119   }
2120   /* reset ilen and imax for each row */
2121   for (i=0; i<mbs; i++) {
2122     ailen[i] = imax[i] = ai[i+1] - ai[i];
2123   }
2124   a->nz = ai[mbs];
2125 
2126   /* diagonals may have moved, so kill the diagonal pointers */
2127   a->idiagvalid = PETSC_FALSE;
2128   if (fshift && a->diag) {
2129     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2130     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2131     a->diag = 0;
2132   }
2133   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2134   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2135   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2136   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2137   A->info.mallocs     += a->reallocs;
2138   a->reallocs          = 0;
2139   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2140 
2141   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2142   A->same_nonzero = PETSC_TRUE;
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 /*
2147    This function returns an array of flags which indicate the locations of contiguous
2148    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2149    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2150    Assume: sizes should be long enough to hold all the values.
2151 */
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2154 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2155 {
2156   PetscInt   i,j,k,row;
2157   PetscBool  flg;
2158 
2159   PetscFunctionBegin;
2160   for (i=0,j=0; i<n; j++) {
2161     row = idx[i];
2162     if (row%bs!=0) { /* Not the begining of a block */
2163       sizes[j] = 1;
2164       i++;
2165     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2166       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2167       i++;
2168     } else { /* Begining of the block, so check if the complete block exists */
2169       flg = PETSC_TRUE;
2170       for (k=1; k<bs; k++) {
2171         if (row+k != idx[i+k]) { /* break in the block */
2172           flg = PETSC_FALSE;
2173           break;
2174         }
2175       }
2176       if (flg) { /* No break in the bs */
2177         sizes[j] = bs;
2178         i+= bs;
2179       } else {
2180         sizes[j] = 1;
2181         i++;
2182       }
2183     }
2184   }
2185   *bs_max = j;
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2191 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2192 {
2193   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2194   PetscErrorCode    ierr;
2195   PetscInt          i,j,k,count,*rows;
2196   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2197   PetscScalar       zero = 0.0;
2198   MatScalar         *aa;
2199   const PetscScalar *xx;
2200   PetscScalar       *bb;
2201 
2202   PetscFunctionBegin;
2203   /* fix right hand side if needed */
2204   if (x && b) {
2205     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2206     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2207     for (i=0; i<is_n; i++) {
2208       bb[is_idx[i]] = diag*xx[is_idx[i]];
2209     }
2210     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2211     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2212   }
2213 
2214   /* Make a copy of the IS and  sort it */
2215   /* allocate memory for rows,sizes */
2216   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2217 
2218   /* copy IS values to rows, and sort them */
2219   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2220   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2221 
2222   if (baij->keepnonzeropattern) {
2223     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2224     bs_max = is_n;
2225     A->same_nonzero = PETSC_TRUE;
2226   } else {
2227     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2228     A->same_nonzero = PETSC_FALSE;
2229   }
2230 
2231   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2232     row   = rows[j];
2233     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2234     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2235     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2236     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2237       if (diag != (PetscScalar)0.0) {
2238         if (baij->ilen[row/bs] > 0) {
2239           baij->ilen[row/bs]       = 1;
2240           baij->j[baij->i[row/bs]] = row/bs;
2241           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2242         }
2243         /* Now insert all the diagonal values for this bs */
2244         for (k=0; k<bs; k++) {
2245           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2246         }
2247       } else { /* (diag == 0.0) */
2248         baij->ilen[row/bs] = 0;
2249       } /* end (diag == 0.0) */
2250     } else { /* (sizes[i] != bs) */
2251 #if defined (PETSC_USE_DEBUG)
2252       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2253 #endif
2254       for (k=0; k<count; k++) {
2255         aa[0] =  zero;
2256         aa    += bs;
2257       }
2258       if (diag != (PetscScalar)0.0) {
2259         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2260       }
2261     }
2262   }
2263 
2264   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2271 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2272 {
2273   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2274   PetscErrorCode    ierr;
2275   PetscInt          i,j,k,count;
2276   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2277   PetscScalar       zero = 0.0;
2278   MatScalar         *aa;
2279   const PetscScalar *xx;
2280   PetscScalar       *bb;
2281   PetscBool         *zeroed,vecs = PETSC_FALSE;
2282 
2283   PetscFunctionBegin;
2284   /* fix right hand side if needed */
2285   if (x && b) {
2286     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2287     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2288     vecs = PETSC_TRUE;
2289   }
2290   A->same_nonzero = PETSC_TRUE;
2291 
2292   /* zero the columns */
2293   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2294   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2295   for (i=0; i<is_n; i++) {
2296     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2297     zeroed[is_idx[i]] = PETSC_TRUE;
2298   }
2299   for (i=0; i<A->rmap->N; i++) {
2300     if (!zeroed[i]) {
2301       row = i/bs;
2302       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2303         for (k=0; k<bs; k++) {
2304           col = bs*baij->j[j] + k;
2305 	  if (zeroed[col]) {
2306 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2307             if (vecs) bb[i] -= aa[0]*xx[col];
2308             aa[0] = 0.0;
2309           }
2310         }
2311       }
2312     } else if (vecs) bb[i] = diag*xx[i];
2313   }
2314   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2315   if (vecs) {
2316     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2317     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2318   }
2319 
2320   /* zero the rows */
2321   for (i=0; i<is_n; i++) {
2322     row   = is_idx[i];
2323     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2324     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2325     for (k=0; k<count; k++) {
2326       aa[0] =  zero;
2327       aa    += bs;
2328     }
2329     if (diag != (PetscScalar)0.0) {
2330       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2331     }
2332   }
2333   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2339 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2340 {
2341   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2342   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2343   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2344   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2345   PetscErrorCode ierr;
2346   PetscInt       ridx,cidx,bs2=a->bs2;
2347   PetscBool      roworiented=a->roworiented;
2348   MatScalar      *ap,value,*aa=a->a,*bap;
2349 
2350   PetscFunctionBegin;
2351   if (v) PetscValidScalarPointer(v,6);
2352   for (k=0; k<m; k++) { /* loop over added rows */
2353     row  = im[k];
2354     brow = row/bs;
2355     if (row < 0) continue;
2356 #if defined(PETSC_USE_DEBUG)
2357     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2358 #endif
2359     rp   = aj + ai[brow];
2360     ap   = aa + bs2*ai[brow];
2361     rmax = imax[brow];
2362     nrow = ailen[brow];
2363     low  = 0;
2364     high = nrow;
2365     for (l=0; l<n; l++) { /* loop over added columns */
2366       if (in[l] < 0) continue;
2367 #if defined(PETSC_USE_DEBUG)
2368       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2369 #endif
2370       col = in[l]; bcol = col/bs;
2371       ridx = row % bs; cidx = col % bs;
2372       if (roworiented) {
2373         value = v[l + k*n];
2374       } else {
2375         value = v[k + l*m];
2376       }
2377       if (col <= lastcol) low = 0; else high = nrow;
2378       lastcol = col;
2379       while (high-low > 7) {
2380         t = (low+high)/2;
2381         if (rp[t] > bcol) high = t;
2382         else              low  = t;
2383       }
2384       for (i=low; i<high; i++) {
2385         if (rp[i] > bcol) break;
2386         if (rp[i] == bcol) {
2387           bap  = ap +  bs2*i + bs*cidx + ridx;
2388           if (is == ADD_VALUES) *bap += value;
2389           else                  *bap  = value;
2390           goto noinsert1;
2391         }
2392       }
2393       if (nonew == 1) goto noinsert1;
2394       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2395       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2396       N = nrow++ - 1; high++;
2397       /* shift up all the later entries in this row */
2398       for (ii=N; ii>=i; ii--) {
2399         rp[ii+1] = rp[ii];
2400         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2401       }
2402       if (N>=i) {
2403         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2404       }
2405       rp[i]                      = bcol;
2406       ap[bs2*i + bs*cidx + ridx] = value;
2407       a->nz++;
2408       noinsert1:;
2409       low = i;
2410     }
2411     ailen[brow] = nrow;
2412   }
2413   A->same_nonzero = PETSC_FALSE;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2419 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2420 {
2421   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2422   Mat            outA;
2423   PetscErrorCode ierr;
2424   PetscBool      row_identity,col_identity;
2425 
2426   PetscFunctionBegin;
2427   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2428   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2429   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2430   if (!row_identity || !col_identity) {
2431     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2432   }
2433 
2434   outA            = inA;
2435   inA->factortype = MAT_FACTOR_LU;
2436 
2437   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2438 
2439   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2441   a->row = row;
2442   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2443   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2444   a->col = col;
2445 
2446   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2447   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2448    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2450 
2451   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2452   if (!a->solve_work) {
2453     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2454     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2455   }
2456   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2457 
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 EXTERN_C_BEGIN
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2464 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2465 {
2466   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2467   PetscInt    i,nz,mbs;
2468 
2469   PetscFunctionBegin;
2470   nz  = baij->maxnz;
2471   mbs = baij->mbs;
2472   for (i=0; i<nz; i++) {
2473     baij->j[i] = indices[i];
2474   }
2475   baij->nz = nz;
2476   for (i=0; i<mbs; i++) {
2477     baij->ilen[i] = baij->imax[i];
2478   }
2479   PetscFunctionReturn(0);
2480 }
2481 EXTERN_C_END
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2485 /*@
2486     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2487        in the matrix.
2488 
2489   Input Parameters:
2490 +  mat - the SeqBAIJ matrix
2491 -  indices - the column indices
2492 
2493   Level: advanced
2494 
2495   Notes:
2496     This can be called if you have precomputed the nonzero structure of the
2497   matrix and want to provide it to the matrix object to improve the performance
2498   of the MatSetValues() operation.
2499 
2500     You MUST have set the correct numbers of nonzeros per row in the call to
2501   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2502 
2503     MUST be called before any calls to MatSetValues();
2504 
2505 @*/
2506 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2512   PetscValidPointer(indices,2);
2513   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2519 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2520 {
2521   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2522   PetscErrorCode ierr;
2523   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2524   PetscReal      atmp;
2525   PetscScalar    *x,zero = 0.0;
2526   MatScalar      *aa;
2527   PetscInt       ncols,brow,krow,kcol;
2528 
2529   PetscFunctionBegin;
2530   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2531   bs   = A->rmap->bs;
2532   aa   = a->a;
2533   ai   = a->i;
2534   aj   = a->j;
2535   mbs  = a->mbs;
2536 
2537   ierr = VecSet(v,zero);CHKERRQ(ierr);
2538   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2539   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2540   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2541   for (i=0; i<mbs; i++) {
2542     ncols = ai[1] - ai[0]; ai++;
2543     brow  = bs*i;
2544     for (j=0; j<ncols; j++){
2545       for (kcol=0; kcol<bs; kcol++){
2546         for (krow=0; krow<bs; krow++){
2547           atmp = PetscAbsScalar(*aa);aa++;
2548           row = brow + krow;    /* row index */
2549           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2550           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2551         }
2552       }
2553       aj++;
2554     }
2555   }
2556   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatCopy_SeqBAIJ"
2562 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2563 {
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   /* If the two matrices have the same copy implementation, use fast copy. */
2568   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2569     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2570     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2571     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2572 
2573     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2574     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2575     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2576   } else {
2577     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2584 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2585 {
2586   PetscErrorCode ierr;
2587 
2588   PetscFunctionBegin;
2589   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2595 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2596 {
2597   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2598   PetscFunctionBegin;
2599   *array = a->a;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2605 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2606 {
2607   PetscFunctionBegin;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2613 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2614 {
2615   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2616   PetscErrorCode ierr;
2617   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2618   PetscBLASInt   one=1;
2619 
2620   PetscFunctionBegin;
2621   if (str == SAME_NONZERO_PATTERN) {
2622     PetscScalar alpha = a;
2623     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2624     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2625   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2626     if (y->xtoy && y->XtoY != X) {
2627       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2628       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2629     }
2630     if (!y->xtoy) { /* get xtoy */
2631       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2632       y->XtoY = X;
2633       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2634     }
2635     for (i=0; i<x->nz; i++) {
2636       j = 0;
2637       while (j < bs2){
2638         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2639         j++;
2640       }
2641     }
2642     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2643   } else {
2644     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2651 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2652 {
2653   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2654   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2655   MatScalar      *aa = a->a;
2656 
2657   PetscFunctionBegin;
2658   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2664 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2665 {
2666   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2667   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2668   MatScalar      *aa = a->a;
2669 
2670   PetscFunctionBegin;
2671   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2679 /*
2680     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2681 */
2682 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2683 {
2684   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2685   PetscErrorCode ierr;
2686   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2687   PetscInt       nz = a->i[m],row,*jj,mr,col;
2688 
2689   PetscFunctionBegin;
2690   *nn = n;
2691   if (!ia) PetscFunctionReturn(0);
2692   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2693   else {
2694     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2695     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2697     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2698     jj = a->j;
2699     for (i=0; i<nz; i++) {
2700       collengths[jj[i]]++;
2701     }
2702     cia[0] = oshift;
2703     for (i=0; i<n; i++) {
2704       cia[i+1] = cia[i] + collengths[i];
2705     }
2706     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2707     jj   = a->j;
2708     for (row=0; row<m; row++) {
2709       mr = a->i[row+1] - a->i[row];
2710       for (i=0; i<mr; i++) {
2711         col = *jj++;
2712         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2713       }
2714     }
2715     ierr = PetscFree(collengths);CHKERRQ(ierr);
2716     *ia = cia; *ja = cja;
2717   }
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2723 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2724 {
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   if (!ia) PetscFunctionReturn(0);
2729   ierr = PetscFree(*ia);CHKERRQ(ierr);
2730   ierr = PetscFree(*ja);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2736 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2737 {
2738   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2739   PetscErrorCode ierr;
2740   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2741   PetscScalar    dx,*y,*xx,*w3_array;
2742   PetscScalar    *vscale_array;
2743   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2744   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2745   void           *fctx = coloring->fctx;
2746   PetscBool      flg = PETSC_FALSE;
2747   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2748   Vec            x1_tmp;
2749 
2750   PetscFunctionBegin;
2751   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2752   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2753   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2754   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2755 
2756   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2757   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2758   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2759   if (flg) {
2760     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2761   } else {
2762     PetscBool  assembled;
2763     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2764     if (assembled) {
2765       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2766     }
2767   }
2768 
2769   x1_tmp = x1;
2770   if (!coloring->vscale){
2771     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2772   }
2773 
2774   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2775     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2776   }
2777   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2778 
2779   /* Set w1 = F(x1) */
2780   if (!coloring->fset) {
2781     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2782     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2783     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2784   } else {
2785     coloring->fset = PETSC_FALSE;
2786   }
2787 
2788   if (!coloring->w3) {
2789     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2791   }
2792   w3 = coloring->w3;
2793 
2794     CHKMEMQ;
2795     /* Compute all the local scale factors, including ghost points */
2796   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2797   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2798   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2799   if (ctype == IS_COLORING_GHOSTED){
2800     col_start = 0; col_end = N;
2801   } else if (ctype == IS_COLORING_GLOBAL){
2802     xx = xx - start;
2803     vscale_array = vscale_array - start;
2804     col_start = start; col_end = N + start;
2805   }    CHKMEMQ;
2806   for (col=col_start; col<col_end; col++){
2807     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2808     if (coloring->htype[0] == 'w') {
2809       dx = 1.0 + unorm;
2810     } else {
2811       dx  = xx[col];
2812     }
2813     if (dx == (PetscScalar)0.0) dx = 1.0;
2814     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2815     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2816     dx               *= epsilon;
2817     vscale_array[col] = (PetscScalar)1.0/dx;
2818   }     CHKMEMQ;
2819   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2820   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2821   if (ctype == IS_COLORING_GLOBAL){
2822     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2823     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2824   }
2825   CHKMEMQ;
2826   if (coloring->vscaleforrow) {
2827     vscaleforrow = coloring->vscaleforrow;
2828   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2829 
2830   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2831   /*
2832     Loop over each color
2833   */
2834   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2835   for (k=0; k<coloring->ncolors; k++) {
2836     coloring->currentcolor = k;
2837     for (i=0; i<bs; i++) {
2838       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2839       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2840       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2841       /*
2842 	Loop over each column associated with color
2843 	adding the perturbation to the vector w3.
2844       */
2845       for (l=0; l<coloring->ncolumns[k]; l++) {
2846 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2847 	if (coloring->htype[0] == 'w') {
2848 	  dx = 1.0 + unorm;
2849 	} else {
2850 	  dx  = xx[col];
2851 	}
2852 	if (dx == (PetscScalar)0.0) dx = 1.0;
2853 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2854 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2855 	dx            *= epsilon;
2856 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2857 	w3_array[col] += dx;
2858       }
2859       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2860       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2861 
2862       /*
2863 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2864 	w2 = F(x1 + dx) - F(x1)
2865       */
2866       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2867       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2868       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2869       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2870 
2871       /*
2872 	Loop over rows of vector, putting results into Jacobian matrix
2873       */
2874       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2875       for (l=0; l<coloring->nrows[k]; l++) {
2876 	row    = bs*coloring->rows[k][l];             /* local row index */
2877 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2878         for (j=0; j<bs; j++) {
2879   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2880           srows[j]  = row + start + j;
2881         }
2882 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2883       }
2884       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2885     }
2886   } /* endof for each color */
2887   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2888   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2889   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2890   ierr = PetscFree(srows);CHKERRQ(ierr);
2891 
2892   coloring->currentcolor = -1;
2893   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2894   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2895   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2896   PetscFunctionReturn(0);
2897 }
2898 
2899 /* -------------------------------------------------------------------*/
2900 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2901        MatGetRow_SeqBAIJ,
2902        MatRestoreRow_SeqBAIJ,
2903        MatMult_SeqBAIJ_N,
2904 /* 4*/ MatMultAdd_SeqBAIJ_N,
2905        MatMultTranspose_SeqBAIJ,
2906        MatMultTransposeAdd_SeqBAIJ,
2907        0,
2908        0,
2909        0,
2910 /*10*/ 0,
2911        MatLUFactor_SeqBAIJ,
2912        0,
2913        0,
2914        MatTranspose_SeqBAIJ,
2915 /*15*/ MatGetInfo_SeqBAIJ,
2916        MatEqual_SeqBAIJ,
2917        MatGetDiagonal_SeqBAIJ,
2918        MatDiagonalScale_SeqBAIJ,
2919        MatNorm_SeqBAIJ,
2920 /*20*/ 0,
2921        MatAssemblyEnd_SeqBAIJ,
2922        MatSetOption_SeqBAIJ,
2923        MatZeroEntries_SeqBAIJ,
2924 /*24*/ MatZeroRows_SeqBAIJ,
2925        0,
2926        0,
2927        0,
2928        0,
2929 /*29*/ MatSetUp_SeqBAIJ,
2930        0,
2931        0,
2932        0,
2933        0,
2934 /*34*/ MatDuplicate_SeqBAIJ,
2935        0,
2936        0,
2937        MatILUFactor_SeqBAIJ,
2938        0,
2939 /*39*/ MatAXPY_SeqBAIJ,
2940        MatGetSubMatrices_SeqBAIJ,
2941        MatIncreaseOverlap_SeqBAIJ,
2942        MatGetValues_SeqBAIJ,
2943        MatCopy_SeqBAIJ,
2944 /*44*/ 0,
2945        MatScale_SeqBAIJ,
2946        0,
2947        0,
2948        MatZeroRowsColumns_SeqBAIJ,
2949 /*49*/ 0,
2950        MatGetRowIJ_SeqBAIJ,
2951        MatRestoreRowIJ_SeqBAIJ,
2952        MatGetColumnIJ_SeqBAIJ,
2953        MatRestoreColumnIJ_SeqBAIJ,
2954 /*54*/ MatFDColoringCreate_SeqAIJ,
2955        0,
2956        0,
2957        0,
2958        MatSetValuesBlocked_SeqBAIJ,
2959 /*59*/ MatGetSubMatrix_SeqBAIJ,
2960        MatDestroy_SeqBAIJ,
2961        MatView_SeqBAIJ,
2962        0,
2963        0,
2964 /*64*/ 0,
2965        0,
2966        0,
2967        0,
2968        0,
2969 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2970        0,
2971        MatConvert_Basic,
2972        0,
2973        0,
2974 /*74*/ 0,
2975        MatFDColoringApply_BAIJ,
2976        0,
2977        0,
2978        0,
2979 /*79*/ 0,
2980        0,
2981        0,
2982        0,
2983        MatLoad_SeqBAIJ,
2984 /*84*/ 0,
2985        0,
2986        0,
2987        0,
2988        0,
2989 /*89*/ 0,
2990        0,
2991        0,
2992        0,
2993        0,
2994 /*94*/ 0,
2995        0,
2996        0,
2997        0,
2998        0,
2999 /*99*/0,
3000        0,
3001        0,
3002        0,
3003        0,
3004 /*104*/0,
3005        MatRealPart_SeqBAIJ,
3006        MatImaginaryPart_SeqBAIJ,
3007        0,
3008        0,
3009 /*109*/0,
3010        0,
3011        0,
3012        0,
3013        MatMissingDiagonal_SeqBAIJ,
3014 /*114*/0,
3015        0,
3016        0,
3017        0,
3018        0,
3019 /*119*/0,
3020        0,
3021        MatMultHermitianTranspose_SeqBAIJ,
3022        MatMultHermitianTransposeAdd_SeqBAIJ,
3023        0,
3024 /*124*/0,
3025        0,
3026        MatInvertBlockDiagonal_SeqBAIJ
3027 };
3028 
3029 EXTERN_C_BEGIN
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3032 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3033 {
3034   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3035   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3036   PetscErrorCode ierr;
3037 
3038   PetscFunctionBegin;
3039   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3040 
3041   /* allocate space for values if not already there */
3042   if (!aij->saved_values) {
3043     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3044     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3045   }
3046 
3047   /* copy values over */
3048   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3049   PetscFunctionReturn(0);
3050 }
3051 EXTERN_C_END
3052 
3053 EXTERN_C_BEGIN
3054 #undef __FUNCT__
3055 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3056 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3057 {
3058   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3059   PetscErrorCode ierr;
3060   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3061 
3062   PetscFunctionBegin;
3063   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3064   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3065 
3066   /* copy values over */
3067   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 EXTERN_C_END
3071 
3072 EXTERN_C_BEGIN
3073 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3074 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3075 EXTERN_C_END
3076 
3077 EXTERN_C_BEGIN
3078 #undef __FUNCT__
3079 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3080 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3081 {
3082   Mat_SeqBAIJ    *b;
3083   PetscErrorCode ierr;
3084   PetscInt       i,mbs,nbs,bs2;
3085   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3086 
3087   PetscFunctionBegin;
3088   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3089   if (nz == MAT_SKIP_ALLOCATION) {
3090     skipallocation = PETSC_TRUE;
3091     nz             = 0;
3092   }
3093 
3094   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3095   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3096   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3097   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3098   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3099 
3100   B->preallocated = PETSC_TRUE;
3101 
3102   mbs  = B->rmap->n/bs;
3103   nbs  = B->cmap->n/bs;
3104   bs2  = bs*bs;
3105 
3106   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3107 
3108   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3109   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3110   if (nnz) {
3111     for (i=0; i<mbs; i++) {
3112       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3113       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3114     }
3115   }
3116 
3117   b       = (Mat_SeqBAIJ*)B->data;
3118   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3119     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3120   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3121 
3122   if (!flg) {
3123     switch (bs) {
3124     case 1:
3125       B->ops->mult            = MatMult_SeqBAIJ_1;
3126       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3127       B->ops->sor             = MatSOR_SeqBAIJ_1;
3128       break;
3129     case 2:
3130       B->ops->mult            = MatMult_SeqBAIJ_2;
3131       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3132       B->ops->sor             = MatSOR_SeqBAIJ_2;
3133       break;
3134     case 3:
3135       B->ops->mult            = MatMult_SeqBAIJ_3;
3136       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3137       B->ops->sor             = MatSOR_SeqBAIJ_3;
3138       break;
3139     case 4:
3140       B->ops->mult            = MatMult_SeqBAIJ_4;
3141       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3142       B->ops->sor             = MatSOR_SeqBAIJ_4;
3143       break;
3144     case 5:
3145       B->ops->mult            = MatMult_SeqBAIJ_5;
3146       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3147       B->ops->sor             = MatSOR_SeqBAIJ_5;
3148       break;
3149     case 6:
3150       B->ops->mult            = MatMult_SeqBAIJ_6;
3151       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3152       B->ops->sor             = MatSOR_SeqBAIJ_6;
3153       break;
3154     case 7:
3155       B->ops->mult            = MatMult_SeqBAIJ_7;
3156       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3157       B->ops->sor             = MatSOR_SeqBAIJ_7;
3158       break;
3159     case 15:
3160       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3161       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3162       B->ops->sor             = MatSOR_SeqBAIJ_N;
3163       break;
3164     default:
3165       B->ops->mult            = MatMult_SeqBAIJ_N;
3166       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3167       B->ops->sor             = MatSOR_SeqBAIJ_N;
3168       break;
3169     }
3170   }
3171   b->mbs       = mbs;
3172   b->nbs       = nbs;
3173   if (!skipallocation) {
3174     if (!b->imax) {
3175       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3176       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3177       b->free_imax_ilen = PETSC_TRUE;
3178     }
3179     /* b->ilen will count nonzeros in each block row so far. */
3180     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3181     if (!nnz) {
3182       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3183       else if (nz < 0) nz = 1;
3184       for (i=0; i<mbs; i++) b->imax[i] = nz;
3185       nz = nz*mbs;
3186     } else {
3187       nz = 0;
3188       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3189     }
3190 
3191     /* allocate the matrix space */
3192     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3193     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3194     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3195     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3196     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3197     b->singlemalloc = PETSC_TRUE;
3198     b->i[0] = 0;
3199     for (i=1; i<mbs+1; i++) {
3200       b->i[i] = b->i[i-1] + b->imax[i-1];
3201     }
3202     b->free_a     = PETSC_TRUE;
3203     b->free_ij    = PETSC_TRUE;
3204   } else {
3205     b->free_a     = PETSC_FALSE;
3206     b->free_ij    = PETSC_FALSE;
3207   }
3208 
3209   b->bs2              = bs2;
3210   b->mbs              = mbs;
3211   b->nz               = 0;
3212   b->maxnz            = nz;
3213   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3214   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3215   PetscFunctionReturn(0);
3216 }
3217 EXTERN_C_END
3218 
3219 EXTERN_C_BEGIN
3220 #undef __FUNCT__
3221 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3222 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3223 {
3224   PetscInt       i,m,nz,nz_max=0,*nnz;
3225   PetscScalar    *values=0;
3226   PetscErrorCode ierr;
3227 
3228   PetscFunctionBegin;
3229   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3230   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3231   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3232   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3233   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3234   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3235   m = B->rmap->n/bs;
3236 
3237   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3238   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3239   for (i=0; i<m; i++) {
3240     nz = ii[i+1]- ii[i];
3241     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3242     nz_max = PetscMax(nz_max, nz);
3243     nnz[i] = nz;
3244   }
3245   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3246   ierr = PetscFree(nnz);CHKERRQ(ierr);
3247 
3248   values = (PetscScalar*)V;
3249   if (!values) {
3250     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3251     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3252   }
3253   for (i=0; i<m; i++) {
3254     PetscInt          ncols  = ii[i+1] - ii[i];
3255     const PetscInt    *icols = jj + ii[i];
3256     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3257     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3258   }
3259   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3260   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3261   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3262   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3263   PetscFunctionReturn(0);
3264 }
3265 EXTERN_C_END
3266 
3267 
3268 EXTERN_C_BEGIN
3269 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3270 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3271 #if defined(PETSC_HAVE_MUMPS)
3272 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3273 #endif
3274 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3275 EXTERN_C_END
3276 
3277 /*MC
3278    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3279    block sparse compressed row format.
3280 
3281    Options Database Keys:
3282 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3283 
3284   Level: beginner
3285 
3286 .seealso: MatCreateSeqBAIJ()
3287 M*/
3288 
3289 EXTERN_C_BEGIN
3290 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3291 EXTERN_C_END
3292 
3293 EXTERN_C_BEGIN
3294 #undef __FUNCT__
3295 #define __FUNCT__ "MatCreate_SeqBAIJ"
3296 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3297 {
3298   PetscErrorCode ierr;
3299   PetscMPIInt    size;
3300   Mat_SeqBAIJ    *b;
3301 
3302   PetscFunctionBegin;
3303   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3304   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3305 
3306   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3307   B->data = (void*)b;
3308   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3309   b->row                   = 0;
3310   b->col                   = 0;
3311   b->icol                  = 0;
3312   b->reallocs              = 0;
3313   b->saved_values          = 0;
3314 
3315   b->roworiented           = PETSC_TRUE;
3316   b->nonew                 = 0;
3317   b->diag                  = 0;
3318   b->solve_work            = 0;
3319   b->mult_work             = 0;
3320   B->spptr                 = 0;
3321   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3322   b->keepnonzeropattern    = PETSC_FALSE;
3323   b->xtoy                  = 0;
3324   b->XtoY                  = 0;
3325   B->same_nonzero          = PETSC_FALSE;
3326 
3327   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3328                                      "MatGetFactorAvailable_seqbaij_petsc",
3329                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3330   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3331                                      "MatGetFactor_seqbaij_petsc",
3332                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3334                                      "MatGetFactor_seqbaij_bstrm",
3335                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3336 #if defined(PETSC_HAVE_MUMPS)
3337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3338 #endif
3339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3340                                      "MatInvertBlockDiagonal_SeqBAIJ",
3341                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3343                                      "MatStoreValues_SeqBAIJ",
3344                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3346                                      "MatRetrieveValues_SeqBAIJ",
3347                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3349                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3350                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3352                                      "MatConvert_SeqBAIJ_SeqAIJ",
3353                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3355                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3356                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3358                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3359                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3361                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3362                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3364                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3365                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3367                                      "MatIsTranspose_SeqBAIJ",
3368                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3369   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3370   PetscFunctionReturn(0);
3371 }
3372 EXTERN_C_END
3373 
3374 #undef __FUNCT__
3375 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3376 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3377 {
3378   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3379   PetscErrorCode ierr;
3380   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3381 
3382   PetscFunctionBegin;
3383   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3384 
3385   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3386     c->imax = a->imax;
3387     c->ilen = a->ilen;
3388     c->free_imax_ilen = PETSC_FALSE;
3389   } else {
3390     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3391     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3392     for (i=0; i<mbs; i++) {
3393       c->imax[i] = a->imax[i];
3394       c->ilen[i] = a->ilen[i];
3395     }
3396     c->free_imax_ilen = PETSC_TRUE;
3397   }
3398 
3399   /* allocate the matrix space */
3400   if (mallocmatspace){
3401     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3402       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3403       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3404       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3405       c->i            = a->i;
3406       c->j            = a->j;
3407       c->singlemalloc = PETSC_FALSE;
3408       c->free_a       = PETSC_TRUE;
3409       c->free_ij      = PETSC_FALSE;
3410       c->parent       = A;
3411       C->preallocated = PETSC_TRUE;
3412       C->assembled    = PETSC_TRUE;
3413       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3414       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3415       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3416     } else {
3417       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3418       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3419       c->singlemalloc = PETSC_TRUE;
3420       c->free_a       = PETSC_TRUE;
3421       c->free_ij      = PETSC_TRUE;
3422       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3423       if (mbs > 0) {
3424 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3425 	if (cpvalues == MAT_COPY_VALUES) {
3426 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3427 	} else {
3428 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3429 	}
3430       }
3431       C->preallocated = PETSC_TRUE;
3432       C->assembled    = PETSC_TRUE;
3433     }
3434   }
3435 
3436   c->roworiented = a->roworiented;
3437   c->nonew       = a->nonew;
3438   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3439   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3440   c->bs2         = a->bs2;
3441   c->mbs         = a->mbs;
3442   c->nbs         = a->nbs;
3443 
3444   if (a->diag) {
3445     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3446       c->diag      = a->diag;
3447       c->free_diag = PETSC_FALSE;
3448     } else {
3449       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3450       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3451       for (i=0; i<mbs; i++) {
3452         c->diag[i] = a->diag[i];
3453       }
3454       c->free_diag = PETSC_TRUE;
3455     }
3456   } else c->diag        = 0;
3457   c->nz                 = a->nz;
3458   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3459   c->solve_work         = 0;
3460   c->mult_work          = 0;
3461 
3462   c->compressedrow.use     = a->compressedrow.use;
3463   c->compressedrow.nrows   = a->compressedrow.nrows;
3464   c->compressedrow.check   = a->compressedrow.check;
3465   if (a->compressedrow.use){
3466     i = a->compressedrow.nrows;
3467     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3468     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3469     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3470     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3471   } else {
3472     c->compressedrow.use    = PETSC_FALSE;
3473     c->compressedrow.i      = PETSC_NULL;
3474     c->compressedrow.rindex = PETSC_NULL;
3475   }
3476   C->same_nonzero = A->same_nonzero;
3477   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3478   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3479   PetscFunctionReturn(0);
3480 }
3481 
3482 #undef __FUNCT__
3483 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3484 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3485 {
3486     PetscErrorCode ierr;
3487 
3488   PetscFunctionBegin;
3489   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3490   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3491   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3492   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatLoad_SeqBAIJ"
3498 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3499 {
3500   Mat_SeqBAIJ    *a;
3501   PetscErrorCode ierr;
3502   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3503   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3504   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3505   PetscInt       *masked,nmask,tmp,bs2,ishift;
3506   PetscMPIInt    size;
3507   int            fd;
3508   PetscScalar    *aa;
3509   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3510 
3511   PetscFunctionBegin;
3512   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3513   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3514   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3515   bs2  = bs*bs;
3516 
3517   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3518   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3519   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3520   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3521   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3522   M = header[1]; N = header[2]; nz = header[3];
3523 
3524   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3525   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3526 
3527   /*
3528      This code adds extra rows to make sure the number of rows is
3529     divisible by the blocksize
3530   */
3531   mbs        = M/bs;
3532   extra_rows = bs - M + bs*(mbs);
3533   if (extra_rows == bs) extra_rows = 0;
3534   else                  mbs++;
3535   if (extra_rows) {
3536     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3537   }
3538 
3539   /* Set global sizes if not already set */
3540   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3541     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3542   } else { /* Check if the matrix global sizes are correct */
3543     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3544     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3545       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3546     }
3547     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3548   }
3549 
3550   /* read in row lengths */
3551   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3552   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3553   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3554 
3555   /* read in column indices */
3556   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3557   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3558   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3559 
3560   /* loop over row lengths determining block row lengths */
3561   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3562   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3563   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3564   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3565   rowcount = 0;
3566   nzcount = 0;
3567   for (i=0; i<mbs; i++) {
3568     nmask = 0;
3569     for (j=0; j<bs; j++) {
3570       kmax = rowlengths[rowcount];
3571       for (k=0; k<kmax; k++) {
3572         tmp = jj[nzcount++]/bs;
3573         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3574       }
3575       rowcount++;
3576     }
3577     browlengths[i] += nmask;
3578     /* zero out the mask elements we set */
3579     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3580   }
3581 
3582   /* Do preallocation  */
3583   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3584   a = (Mat_SeqBAIJ*)newmat->data;
3585 
3586   /* set matrix "i" values */
3587   a->i[0] = 0;
3588   for (i=1; i<= mbs; i++) {
3589     a->i[i]      = a->i[i-1] + browlengths[i-1];
3590     a->ilen[i-1] = browlengths[i-1];
3591   }
3592   a->nz         = 0;
3593   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3594 
3595   /* read in nonzero values */
3596   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3597   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3598   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3599 
3600   /* set "a" and "j" values into matrix */
3601   nzcount = 0; jcount = 0;
3602   for (i=0; i<mbs; i++) {
3603     nzcountb = nzcount;
3604     nmask    = 0;
3605     for (j=0; j<bs; j++) {
3606       kmax = rowlengths[i*bs+j];
3607       for (k=0; k<kmax; k++) {
3608         tmp = jj[nzcount++]/bs;
3609 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3610       }
3611     }
3612     /* sort the masked values */
3613     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3614 
3615     /* set "j" values into matrix */
3616     maskcount = 1;
3617     for (j=0; j<nmask; j++) {
3618       a->j[jcount++]  = masked[j];
3619       mask[masked[j]] = maskcount++;
3620     }
3621     /* set "a" values into matrix */
3622     ishift = bs2*a->i[i];
3623     for (j=0; j<bs; j++) {
3624       kmax = rowlengths[i*bs+j];
3625       for (k=0; k<kmax; k++) {
3626         tmp       = jj[nzcountb]/bs ;
3627         block     = mask[tmp] - 1;
3628         point     = jj[nzcountb] - bs*tmp;
3629         idx       = ishift + bs2*block + j + bs*point;
3630         a->a[idx] = (MatScalar)aa[nzcountb++];
3631       }
3632     }
3633     /* zero out the mask elements we set */
3634     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3635   }
3636   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3637 
3638   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3639   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3640   ierr = PetscFree(aa);CHKERRQ(ierr);
3641   ierr = PetscFree(jj);CHKERRQ(ierr);
3642   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3643 
3644   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3645   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3646   PetscFunctionReturn(0);
3647 }
3648 
3649 #undef __FUNCT__
3650 #define __FUNCT__ "MatCreateSeqBAIJ"
3651 /*@C
3652    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3653    compressed row) format.  For good matrix assembly performance the
3654    user should preallocate the matrix storage by setting the parameter nz
3655    (or the array nnz).  By setting these parameters accurately, performance
3656    during matrix assembly can be increased by more than a factor of 50.
3657 
3658    Collective on MPI_Comm
3659 
3660    Input Parameters:
3661 +  comm - MPI communicator, set to PETSC_COMM_SELF
3662 .  bs - size of block
3663 .  m - number of rows
3664 .  n - number of columns
3665 .  nz - number of nonzero blocks  per block row (same for all rows)
3666 -  nnz - array containing the number of nonzero blocks in the various block rows
3667          (possibly different for each block row) or PETSC_NULL
3668 
3669    Output Parameter:
3670 .  A - the matrix
3671 
3672    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3673    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3674    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3675 
3676    Options Database Keys:
3677 .   -mat_no_unroll - uses code that does not unroll the loops in the
3678                      block calculations (much slower)
3679 .    -mat_block_size - size of the blocks to use
3680 
3681    Level: intermediate
3682 
3683    Notes:
3684    The number of rows and columns must be divisible by blocksize.
3685 
3686    If the nnz parameter is given then the nz parameter is ignored
3687 
3688    A nonzero block is any block that as 1 or more nonzeros in it
3689 
3690    The block AIJ format is fully compatible with standard Fortran 77
3691    storage.  That is, the stored row and column indices can begin at
3692    either one (as in Fortran) or zero.  See the users' manual for details.
3693 
3694    Specify the preallocated storage with either nz or nnz (not both).
3695    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3696    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3697    matrices.
3698 
3699 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3700 @*/
3701 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3702 {
3703   PetscErrorCode ierr;
3704 
3705   PetscFunctionBegin;
3706   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3707   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3708   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3709   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3710   PetscFunctionReturn(0);
3711 }
3712 
3713 #undef __FUNCT__
3714 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3715 /*@C
3716    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3717    per row in the matrix. For good matrix assembly performance the
3718    user should preallocate the matrix storage by setting the parameter nz
3719    (or the array nnz).  By setting these parameters accurately, performance
3720    during matrix assembly can be increased by more than a factor of 50.
3721 
3722    Collective on MPI_Comm
3723 
3724    Input Parameters:
3725 +  A - the matrix
3726 .  bs - size of block
3727 .  nz - number of block nonzeros per block row (same for all rows)
3728 -  nnz - array containing the number of block nonzeros in the various block rows
3729          (possibly different for each block row) or PETSC_NULL
3730 
3731    Options Database Keys:
3732 .   -mat_no_unroll - uses code that does not unroll the loops in the
3733                      block calculations (much slower)
3734 .    -mat_block_size - size of the blocks to use
3735 
3736    Level: intermediate
3737 
3738    Notes:
3739    If the nnz parameter is given then the nz parameter is ignored
3740 
3741    You can call MatGetInfo() to get information on how effective the preallocation was;
3742    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3743    You can also run with the option -info and look for messages with the string
3744    malloc in them to see if additional memory allocation was needed.
3745 
3746    The block AIJ format is fully compatible with standard Fortran 77
3747    storage.  That is, the stored row and column indices can begin at
3748    either one (as in Fortran) or zero.  See the users' manual for details.
3749 
3750    Specify the preallocated storage with either nz or nnz (not both).
3751    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3752    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3753 
3754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3755 @*/
3756 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3757 {
3758   PetscErrorCode ierr;
3759 
3760   PetscFunctionBegin;
3761   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3762   PetscValidType(B,1);
3763   PetscValidLogicalCollectiveInt(B,bs,2);
3764   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3765   PetscFunctionReturn(0);
3766 }
3767 
3768 #undef __FUNCT__
3769 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3770 /*@C
3771    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3772    (the default sequential PETSc format).
3773 
3774    Collective on MPI_Comm
3775 
3776    Input Parameters:
3777 +  A - the matrix
3778 .  i - the indices into j for the start of each local row (starts with zero)
3779 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3780 -  v - optional values in the matrix
3781 
3782    Level: developer
3783 
3784 .keywords: matrix, aij, compressed row, sparse
3785 
3786 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3787 @*/
3788 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3789 {
3790   PetscErrorCode ierr;
3791 
3792   PetscFunctionBegin;
3793   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3794   PetscValidType(B,1);
3795   PetscValidLogicalCollectiveInt(B,bs,2);
3796   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3797   PetscFunctionReturn(0);
3798 }
3799 
3800 
3801 #undef __FUNCT__
3802 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3803 /*@
3804      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3805 
3806      Collective on MPI_Comm
3807 
3808    Input Parameters:
3809 +  comm - must be an MPI communicator of size 1
3810 .  bs - size of block
3811 .  m - number of rows
3812 .  n - number of columns
3813 .  i - row indices
3814 .  j - column indices
3815 -  a - matrix values
3816 
3817    Output Parameter:
3818 .  mat - the matrix
3819 
3820    Level: advanced
3821 
3822    Notes:
3823        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3824     once the matrix is destroyed
3825 
3826        You cannot set new nonzero locations into this matrix, that will generate an error.
3827 
3828        The i and j indices are 0 based
3829 
3830        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3831 
3832 
3833 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3834 
3835 @*/
3836 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3837 {
3838   PetscErrorCode ierr;
3839   PetscInt       ii;
3840   Mat_SeqBAIJ    *baij;
3841 
3842   PetscFunctionBegin;
3843   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3844   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3845 
3846   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3847   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3848   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3849   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3850   baij = (Mat_SeqBAIJ*)(*mat)->data;
3851   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3852   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3853 
3854   baij->i = i;
3855   baij->j = j;
3856   baij->a = a;
3857   baij->singlemalloc = PETSC_FALSE;
3858   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3859   baij->free_a       = PETSC_FALSE;
3860   baij->free_ij      = PETSC_FALSE;
3861 
3862   for (ii=0; ii<m; ii++) {
3863     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3864 #if defined(PETSC_USE_DEBUG)
3865     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3866 #endif
3867   }
3868 #if defined(PETSC_USE_DEBUG)
3869   for (ii=0; ii<baij->i[m]; ii++) {
3870     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3871     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3872   }
3873 #endif
3874 
3875   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3876   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3877   PetscFunctionReturn(0);
3878 }
3879