xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5edf65166e22df6feda2ed6bd8a6e348419dee17)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1305 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1310   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1311 
1312   PetscFunctionBegin;
1313   *nn = n;
1314   if (!ia) PetscFunctionReturn(0);
1315   if (symmetric) {
1316     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1317     nz   = tia[n];
1318   } else {
1319     tia = a->i; tja = a->j;
1320   }
1321 
1322   if (!blockcompressed && bs > 1) {
1323     (*nn) *= bs;
1324     /* malloc & create the natural set of indices */
1325     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1326     if (n) {
1327       (*ia)[0] = 0;
1328       for (j=1; j<bs; j++) {
1329         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1330       }
1331     }
1332 
1333     for (i=1; i<n; i++) {
1334       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1335       for (j=1; j<bs; j++) {
1336         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1337       }
1338     }
1339     if (n) {
1340       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1341     }
1342 
1343     if (inja) {
1344       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1345       cnt = 0;
1346       for (i=0; i<n; i++) {
1347         for (j=0; j<bs; j++) {
1348           for (k=tia[i]; k<tia[i+1]; k++) {
1349             for (l=0; l<bs; l++) {
1350               (*ja)[cnt++] = bs*tja[k] + l;
1351 	    }
1352           }
1353         }
1354       }
1355     }
1356 
1357     n     *= bs;
1358     nz *= bs*bs;
1359     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1360       ierr = PetscFree(tia);CHKERRQ(ierr);
1361       ierr = PetscFree(tja);CHKERRQ(ierr);
1362     }
1363   } else if (oshift == 1) {
1364     if (symmetric) {
1365       PetscInt nz = tia[A->rmap->n/bs];
1366       /*  add 1 to i and j indices */
1367       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1368       *ia = tia;
1369       if (ja) {
1370 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1371         *ja = tja;
1372       }
1373     } else {
1374       PetscInt nz = a->i[A->rmap->n/bs];
1375       /* malloc space and  add 1 to i and j indices */
1376       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1377       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1378       if (ja) {
1379 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1380 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1381       }
1382     }
1383   } else {
1384     *ia = tia;
1385     if (ja) *ja = tja;
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1393 {
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   if (!ia) PetscFunctionReturn(0);
1398   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1399     ierr = PetscFree(*ia);CHKERRQ(ierr);
1400     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1408 {
1409   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413 #if defined(PETSC_USE_LOG)
1414   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1415 #endif
1416   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1417   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1418   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1419   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1420   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1421   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1422   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1423   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1424   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1425   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1426   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1427   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1428   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1429 
1430   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1431   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1432   ierr = PetscFree(A->data);CHKERRQ(ierr);
1433 
1434   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1451 {
1452   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1453   PetscErrorCode ierr;
1454 
1455   PetscFunctionBegin;
1456   switch (op) {
1457   case MAT_ROW_ORIENTED:
1458     a->roworiented    = flg;
1459     break;
1460   case MAT_KEEP_NONZERO_PATTERN:
1461     a->keepnonzeropattern = flg;
1462     break;
1463   case MAT_NEW_NONZERO_LOCATIONS:
1464     a->nonew          = (flg ? 0 : 1);
1465     break;
1466   case MAT_NEW_NONZERO_LOCATION_ERR:
1467     a->nonew          = (flg ? -1 : 0);
1468     break;
1469   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1470     a->nonew          = (flg ? -2 : 0);
1471     break;
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473     a->nounused       = (flg ? -1 : 0);
1474     break;
1475   case MAT_CHECK_COMPRESSED_ROW:
1476     a->compressedrow.check = flg;
1477     break;
1478   case MAT_NEW_DIAGONALS:
1479   case MAT_IGNORE_OFF_PROC_ENTRIES:
1480   case MAT_USE_HASH_TABLE:
1481     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1482     break;
1483   case MAT_SPD:
1484   case MAT_SYMMETRIC:
1485   case MAT_STRUCTURALLY_SYMMETRIC:
1486   case MAT_HERMITIAN:
1487   case MAT_SYMMETRY_ETERNAL:
1488     /* These options are handled directly by MatSetOption() */
1489     break;
1490   default:
1491     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1499 {
1500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1501   PetscErrorCode ierr;
1502   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1503   MatScalar      *aa,*aa_i;
1504   PetscScalar    *v_i;
1505 
1506   PetscFunctionBegin;
1507   bs  = A->rmap->bs;
1508   ai  = a->i;
1509   aj  = a->j;
1510   aa  = a->a;
1511   bs2 = a->bs2;
1512 
1513   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1514 
1515   bn  = row/bs;   /* Block number */
1516   bp  = row % bs; /* Block Position */
1517   M   = ai[bn+1] - ai[bn];
1518   *nz = bs*M;
1519 
1520   if (v) {
1521     *v = 0;
1522     if (*nz) {
1523       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1524       for (i=0; i<M; i++) { /* for each block in the block row */
1525         v_i  = *v + i*bs;
1526         aa_i = aa + bs2*(ai[bn] + i);
1527         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1528       }
1529     }
1530   }
1531 
1532   if (idx) {
1533     *idx = 0;
1534     if (*nz) {
1535       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1536       for (i=0; i<M; i++) { /* for each block in the block row */
1537         idx_i = *idx + i*bs;
1538         itmp  = bs*aj[ai[bn] + i];
1539         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1540       }
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1554   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1563 {
1564   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1565   Mat            C;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1568   PetscInt       *rows,*cols,bs2=a->bs2;
1569   MatScalar      *array;
1570 
1571   PetscFunctionBegin;
1572   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1573   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1574     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1575     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1576 
1577     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1578     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1579     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1580     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1581     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1582     ierr = PetscFree(col);CHKERRQ(ierr);
1583   } else {
1584     C = *B;
1585   }
1586 
1587   array = a->a;
1588   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1589   for (i=0; i<mbs; i++) {
1590     cols[0] = i*bs;
1591     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1592     len = ai[i+1] - ai[i];
1593     for (j=0; j<len; j++) {
1594       rows[0] = (*aj++)*bs;
1595       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1596       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1597       array += bs2;
1598     }
1599   }
1600   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1601 
1602   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1604 
1605   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1606     *B = C;
1607   } else {
1608     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 EXTERN_C_BEGIN
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1617 {
1618   PetscErrorCode ierr;
1619   Mat            Btrans;
1620 
1621   PetscFunctionBegin;
1622   *f = PETSC_FALSE;
1623   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1624   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1625   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1633 {
1634   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1635   PetscErrorCode ierr;
1636   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1637   int            fd;
1638   PetscScalar    *aa;
1639   FILE           *file;
1640 
1641   PetscFunctionBegin;
1642   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1643   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1644   col_lens[0] = MAT_FILE_CLASSID;
1645 
1646   col_lens[1] = A->rmap->N;
1647   col_lens[2] = A->cmap->n;
1648   col_lens[3] = a->nz*bs2;
1649 
1650   /* store lengths of each row and write (including header) to file */
1651   count = 0;
1652   for (i=0; i<a->mbs; i++) {
1653     for (j=0; j<bs; j++) {
1654       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1655     }
1656   }
1657   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1658   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1659 
1660   /* store column indices (zero start index) */
1661   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       for (k=a->i[i]; k<a->i[i+1]; k++) {
1666         for (l=0; l<bs; l++) {
1667           jj[count++] = bs*a->j[k] + l;
1668         }
1669       }
1670     }
1671   }
1672   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1673   ierr = PetscFree(jj);CHKERRQ(ierr);
1674 
1675   /* store nonzero values */
1676   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1677   count = 0;
1678   for (i=0; i<a->mbs; i++) {
1679     for (j=0; j<bs; j++) {
1680       for (k=a->i[i]; k<a->i[i+1]; k++) {
1681         for (l=0; l<bs; l++) {
1682           aa[count++] = a->a[bs2*k + l*bs + j];
1683         }
1684       }
1685     }
1686   }
1687   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1688   ierr = PetscFree(aa);CHKERRQ(ierr);
1689 
1690   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1691   if (file) {
1692     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1700 {
1701   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1702   PetscErrorCode    ierr;
1703   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1704   PetscViewerFormat format;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1708   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1709     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1710   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1711     Mat aij;
1712     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1713     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1714     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1715   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1716      PetscFunctionReturn(0);
1717   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1718     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1719     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1720     for (i=0; i<a->mbs; i++) {
1721       for (j=0; j<bs; j++) {
1722         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1723         for (k=a->i[i]; k<a->i[i+1]; k++) {
1724           for (l=0; l<bs; l++) {
1725 #if defined(PETSC_USE_COMPLEX)
1726             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1727               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1728                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1729             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1731                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             }
1735 #else
1736             if (a->a[bs2*k + l*bs + j] != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1738             }
1739 #endif
1740           }
1741         }
1742         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1743       }
1744     }
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1748     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1749     for (i=0; i<a->mbs; i++) {
1750       for (j=0; j<bs; j++) {
1751         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1752         for (k=a->i[i]; k<a->i[i+1]; k++) {
1753           for (l=0; l<bs; l++) {
1754 #if defined(PETSC_USE_COMPLEX)
1755             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1756               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1757                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1758             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1760                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             } else {
1762               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             }
1764 #else
1765             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1766 #endif
1767           }
1768         }
1769         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1770       }
1771     }
1772     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1773   }
1774   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1781 {
1782   Mat            A = (Mat) Aa;
1783   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1784   PetscErrorCode ierr;
1785   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1786   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1787   MatScalar      *aa;
1788   PetscViewer    viewer;
1789   PetscViewerFormat format;
1790 
1791   PetscFunctionBegin;
1792   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1793   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1794 
1795   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1796 
1797   /* loop over matrix elements drawing boxes */
1798 
1799   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1800     color = PETSC_DRAW_BLUE;
1801     for (i=0,row=0; i<mbs; i++,row+=bs) {
1802       for (j=a->i[i]; j<a->i[i+1]; j++) {
1803         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1804         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1805         aa = a->a + j*bs2;
1806         for (k=0; k<bs; k++) {
1807           for (l=0; l<bs; l++) {
1808             if (PetscRealPart(*aa++) >=  0.) continue;
1809             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1810           }
1811         }
1812       }
1813     }
1814     color = PETSC_DRAW_CYAN;
1815     for (i=0,row=0; i<mbs; i++,row+=bs) {
1816       for (j=a->i[i]; j<a->i[i+1]; j++) {
1817         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1818         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1819         aa = a->a + j*bs2;
1820         for (k=0; k<bs; k++) {
1821           for (l=0; l<bs; l++) {
1822             if (PetscRealPart(*aa++) != 0.) continue;
1823             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1824           }
1825         }
1826       }
1827     }
1828     color = PETSC_DRAW_RED;
1829     for (i=0,row=0; i<mbs; i++,row+=bs) {
1830       for (j=a->i[i]; j<a->i[i+1]; j++) {
1831         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1832         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1833         aa = a->a + j*bs2;
1834         for (k=0; k<bs; k++) {
1835           for (l=0; l<bs; l++) {
1836             if (PetscRealPart(*aa++) <= 0.) continue;
1837             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1838           }
1839         }
1840       }
1841     }
1842   } else {
1843     /* use contour shading to indicate magnitude of values */
1844     /* first determine max of all nonzero values */
1845     PetscDraw   popup;
1846     PetscReal scale,maxv = 0.0;
1847 
1848     for (i=0; i<a->nz*a->bs2; i++) {
1849       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1850     }
1851     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1852     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1853     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1854     for (i=0,row=0; i<mbs; i++,row+=bs) {
1855       for (j=a->i[i]; j<a->i[i+1]; j++) {
1856         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1857         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1858         aa = a->a + j*bs2;
1859         for (k=0; k<bs; k++) {
1860           for (l=0; l<bs; l++) {
1861             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1862             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1863           }
1864         }
1865       }
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1874 {
1875   PetscErrorCode ierr;
1876   PetscReal      xl,yl,xr,yr,w,h;
1877   PetscDraw      draw;
1878   PetscBool      isnull;
1879 
1880   PetscFunctionBegin;
1881 
1882   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1883   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1884 
1885   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1886   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1887   xr += w;    yr += h;  xl = -w;     yl = -h;
1888   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1889   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1890   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatView_SeqBAIJ"
1896 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1897 {
1898   PetscErrorCode ierr;
1899   PetscBool      iascii,isbinary,isdraw;
1900 
1901   PetscFunctionBegin;
1902   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1903   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1904   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1905   if (iascii){
1906     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1907   } else if (isbinary) {
1908     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1909   } else if (isdraw) {
1910     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1911   } else {
1912     Mat B;
1913     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1914     ierr = MatView(B,viewer);CHKERRQ(ierr);
1915     ierr = MatDestroy(&B);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1923 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1924 {
1925   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1926   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1927   PetscInt    *ai = a->i,*ailen = a->ilen;
1928   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1929   MatScalar   *ap,*aa = a->a;
1930 
1931   PetscFunctionBegin;
1932   for (k=0; k<m; k++) { /* loop over rows */
1933     row  = im[k]; brow = row/bs;
1934     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1935     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1936     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1937     nrow = ailen[brow];
1938     for (l=0; l<n; l++) { /* loop over columns */
1939       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1940       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1941       col  = in[l] ;
1942       bcol = col/bs;
1943       cidx = col%bs;
1944       ridx = row%bs;
1945       high = nrow;
1946       low  = 0; /* assume unsorted */
1947       while (high-low > 5) {
1948         t = (low+high)/2;
1949         if (rp[t] > bcol) high = t;
1950         else             low  = t;
1951       }
1952       for (i=low; i<high; i++) {
1953         if (rp[i] > bcol) break;
1954         if (rp[i] == bcol) {
1955           *v++ = ap[bs2*i+bs*cidx+ridx];
1956           goto finished;
1957         }
1958       }
1959       *v++ = 0.0;
1960       finished:;
1961     }
1962   }
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1968 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1969 {
1970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1972   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1973   PetscErrorCode    ierr;
1974   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1975   PetscBool         roworiented=a->roworiented;
1976   const PetscScalar *value = v;
1977   MatScalar         *ap,*aa = a->a,*bap;
1978 
1979   PetscFunctionBegin;
1980   if (roworiented) {
1981     stepval = (n-1)*bs;
1982   } else {
1983     stepval = (m-1)*bs;
1984   }
1985   for (k=0; k<m; k++) { /* loop over added rows */
1986     row  = im[k];
1987     if (row < 0) continue;
1988 #if defined(PETSC_USE_DEBUG)
1989     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1990 #endif
1991     rp   = aj + ai[row];
1992     ap   = aa + bs2*ai[row];
1993     rmax = imax[row];
1994     nrow = ailen[row];
1995     low  = 0;
1996     high = nrow;
1997     for (l=0; l<n; l++) { /* loop over added columns */
1998       if (in[l] < 0) continue;
1999 #if defined(PETSC_USE_DEBUG)
2000       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2001 #endif
2002       col = in[l];
2003       if (roworiented) {
2004         value = v + (k*(stepval+bs) + l)*bs;
2005       } else {
2006         value = v + (l*(stepval+bs) + k)*bs;
2007       }
2008       if (col <= lastcol) low = 0; else high = nrow;
2009       lastcol = col;
2010       while (high-low > 7) {
2011         t = (low+high)/2;
2012         if (rp[t] > col) high = t;
2013         else             low  = t;
2014       }
2015       for (i=low; i<high; i++) {
2016         if (rp[i] > col) break;
2017         if (rp[i] == col) {
2018           bap  = ap +  bs2*i;
2019           if (roworiented) {
2020             if (is == ADD_VALUES) {
2021               for (ii=0; ii<bs; ii++,value+=stepval) {
2022                 for (jj=ii; jj<bs2; jj+=bs) {
2023                   bap[jj] += *value++;
2024                 }
2025               }
2026             } else {
2027               for (ii=0; ii<bs; ii++,value+=stepval) {
2028                 for (jj=ii; jj<bs2; jj+=bs) {
2029                   bap[jj] = *value++;
2030                 }
2031               }
2032             }
2033           } else {
2034             if (is == ADD_VALUES) {
2035               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2036                 for (jj=0; jj<bs; jj++) {
2037                   bap[jj] += value[jj];
2038                 }
2039                 bap += bs;
2040               }
2041             } else {
2042               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2043                 for (jj=0; jj<bs; jj++) {
2044                   bap[jj]  = value[jj];
2045                 }
2046                 bap += bs;
2047               }
2048             }
2049           }
2050           goto noinsert2;
2051         }
2052       }
2053       if (nonew == 1) goto noinsert2;
2054       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2055       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2056       N = nrow++ - 1; high++;
2057       /* shift up all the later entries in this row */
2058       for (ii=N; ii>=i; ii--) {
2059         rp[ii+1] = rp[ii];
2060         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2061       }
2062       if (N >= i) {
2063         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2064       }
2065       rp[i] = col;
2066       bap   = ap +  bs2*i;
2067       if (roworiented) {
2068         for (ii=0; ii<bs; ii++,value+=stepval) {
2069           for (jj=ii; jj<bs2; jj+=bs) {
2070             bap[jj] = *value++;
2071           }
2072         }
2073       } else {
2074         for (ii=0; ii<bs; ii++,value+=stepval) {
2075           for (jj=0; jj<bs; jj++) {
2076             *bap++  = *value++;
2077           }
2078         }
2079       }
2080       noinsert2:;
2081       low = i;
2082     }
2083     ailen[row] = nrow;
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2090 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2091 {
2092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2093   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2094   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2095   PetscErrorCode ierr;
2096   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2097   MatScalar      *aa = a->a,*ap;
2098   PetscReal      ratio=0.6;
2099 
2100   PetscFunctionBegin;
2101   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2102 
2103   if (m) rmax = ailen[0];
2104   for (i=1; i<mbs; i++) {
2105     /* move each row back by the amount of empty slots (fshift) before it*/
2106     fshift += imax[i-1] - ailen[i-1];
2107     rmax   = PetscMax(rmax,ailen[i]);
2108     if (fshift) {
2109       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2110       N = ailen[i];
2111       for (j=0; j<N; j++) {
2112         ip[j-fshift] = ip[j];
2113         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2114       }
2115     }
2116     ai[i] = ai[i-1] + ailen[i-1];
2117   }
2118   if (mbs) {
2119     fshift += imax[mbs-1] - ailen[mbs-1];
2120     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2121   }
2122   /* reset ilen and imax for each row */
2123   for (i=0; i<mbs; i++) {
2124     ailen[i] = imax[i] = ai[i+1] - ai[i];
2125   }
2126   a->nz = ai[mbs];
2127 
2128   /* diagonals may have moved, so kill the diagonal pointers */
2129   a->idiagvalid = PETSC_FALSE;
2130   if (fshift && a->diag) {
2131     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2132     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2133     a->diag = 0;
2134   }
2135   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2136   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2137   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2138   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2139   A->info.mallocs     += a->reallocs;
2140   a->reallocs          = 0;
2141   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2142 
2143   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2144   A->same_nonzero = PETSC_TRUE;
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 /*
2149    This function returns an array of flags which indicate the locations of contiguous
2150    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2151    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2152    Assume: sizes should be long enough to hold all the values.
2153 */
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2156 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2157 {
2158   PetscInt   i,j,k,row;
2159   PetscBool  flg;
2160 
2161   PetscFunctionBegin;
2162   for (i=0,j=0; i<n; j++) {
2163     row = idx[i];
2164     if (row%bs!=0) { /* Not the begining of a block */
2165       sizes[j] = 1;
2166       i++;
2167     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2168       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2169       i++;
2170     } else { /* Begining of the block, so check if the complete block exists */
2171       flg = PETSC_TRUE;
2172       for (k=1; k<bs; k++) {
2173         if (row+k != idx[i+k]) { /* break in the block */
2174           flg = PETSC_FALSE;
2175           break;
2176         }
2177       }
2178       if (flg) { /* No break in the bs */
2179         sizes[j] = bs;
2180         i+= bs;
2181       } else {
2182         sizes[j] = 1;
2183         i++;
2184       }
2185     }
2186   }
2187   *bs_max = j;
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2193 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2194 {
2195   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2196   PetscErrorCode    ierr;
2197   PetscInt          i,j,k,count,*rows;
2198   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2199   PetscScalar       zero = 0.0;
2200   MatScalar         *aa;
2201   const PetscScalar *xx;
2202   PetscScalar       *bb;
2203 
2204   PetscFunctionBegin;
2205   /* fix right hand side if needed */
2206   if (x && b) {
2207     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2208     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2209     for (i=0; i<is_n; i++) {
2210       bb[is_idx[i]] = diag*xx[is_idx[i]];
2211     }
2212     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2213     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2214   }
2215 
2216   /* Make a copy of the IS and  sort it */
2217   /* allocate memory for rows,sizes */
2218   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2219 
2220   /* copy IS values to rows, and sort them */
2221   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2222   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2223 
2224   if (baij->keepnonzeropattern) {
2225     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2226     bs_max = is_n;
2227     A->same_nonzero = PETSC_TRUE;
2228   } else {
2229     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2230     A->same_nonzero = PETSC_FALSE;
2231   }
2232 
2233   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2234     row   = rows[j];
2235     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2236     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2237     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2238     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2239       if (diag != (PetscScalar)0.0) {
2240         if (baij->ilen[row/bs] > 0) {
2241           baij->ilen[row/bs]       = 1;
2242           baij->j[baij->i[row/bs]] = row/bs;
2243           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2244         }
2245         /* Now insert all the diagonal values for this bs */
2246         for (k=0; k<bs; k++) {
2247           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2248         }
2249       } else { /* (diag == 0.0) */
2250         baij->ilen[row/bs] = 0;
2251       } /* end (diag == 0.0) */
2252     } else { /* (sizes[i] != bs) */
2253 #if defined (PETSC_USE_DEBUG)
2254       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2255 #endif
2256       for (k=0; k<count; k++) {
2257         aa[0] =  zero;
2258         aa    += bs;
2259       }
2260       if (diag != (PetscScalar)0.0) {
2261         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2262       }
2263     }
2264   }
2265 
2266   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2267   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2273 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2274 {
2275   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2276   PetscErrorCode    ierr;
2277   PetscInt          i,j,k,count;
2278   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2279   PetscScalar       zero = 0.0;
2280   MatScalar         *aa;
2281   const PetscScalar *xx;
2282   PetscScalar       *bb;
2283   PetscBool         *zeroed,vecs = PETSC_FALSE;
2284 
2285   PetscFunctionBegin;
2286   /* fix right hand side if needed */
2287   if (x && b) {
2288     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2289     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2290     vecs = PETSC_TRUE;
2291   }
2292   A->same_nonzero = PETSC_TRUE;
2293 
2294   /* zero the columns */
2295   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2296   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2297   for (i=0; i<is_n; i++) {
2298     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2299     zeroed[is_idx[i]] = PETSC_TRUE;
2300   }
2301   for (i=0; i<A->rmap->N; i++) {
2302     if (!zeroed[i]) {
2303       row = i/bs;
2304       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2305         for (k=0; k<bs; k++) {
2306           col = bs*baij->j[j] + k;
2307 	  if (zeroed[col]) {
2308 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2309             if (vecs) bb[i] -= aa[0]*xx[col];
2310             aa[0] = 0.0;
2311           }
2312         }
2313       }
2314     } else if (vecs) bb[i] = diag*xx[i];
2315   }
2316   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2317   if (vecs) {
2318     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2319     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2320   }
2321 
2322   /* zero the rows */
2323   for (i=0; i<is_n; i++) {
2324     row   = is_idx[i];
2325     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2326     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2327     for (k=0; k<count; k++) {
2328       aa[0] =  zero;
2329       aa    += bs;
2330     }
2331     if (diag != (PetscScalar)0.0) {
2332       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2333     }
2334   }
2335   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2341 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2342 {
2343   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2344   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2345   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2346   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2347   PetscErrorCode ierr;
2348   PetscInt       ridx,cidx,bs2=a->bs2;
2349   PetscBool      roworiented=a->roworiented;
2350   MatScalar      *ap,value,*aa=a->a,*bap;
2351 
2352   PetscFunctionBegin;
2353   if (v) PetscValidScalarPointer(v,6);
2354   for (k=0; k<m; k++) { /* loop over added rows */
2355     row  = im[k];
2356     brow = row/bs;
2357     if (row < 0) continue;
2358 #if defined(PETSC_USE_DEBUG)
2359     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2360 #endif
2361     rp   = aj + ai[brow];
2362     ap   = aa + bs2*ai[brow];
2363     rmax = imax[brow];
2364     nrow = ailen[brow];
2365     low  = 0;
2366     high = nrow;
2367     for (l=0; l<n; l++) { /* loop over added columns */
2368       if (in[l] < 0) continue;
2369 #if defined(PETSC_USE_DEBUG)
2370       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2371 #endif
2372       col = in[l]; bcol = col/bs;
2373       ridx = row % bs; cidx = col % bs;
2374       if (roworiented) {
2375         value = v[l + k*n];
2376       } else {
2377         value = v[k + l*m];
2378       }
2379       if (col <= lastcol) low = 0; else high = nrow;
2380       lastcol = col;
2381       while (high-low > 7) {
2382         t = (low+high)/2;
2383         if (rp[t] > bcol) high = t;
2384         else              low  = t;
2385       }
2386       for (i=low; i<high; i++) {
2387         if (rp[i] > bcol) break;
2388         if (rp[i] == bcol) {
2389           bap  = ap +  bs2*i + bs*cidx + ridx;
2390           if (is == ADD_VALUES) *bap += value;
2391           else                  *bap  = value;
2392           goto noinsert1;
2393         }
2394       }
2395       if (nonew == 1) goto noinsert1;
2396       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2397       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2398       N = nrow++ - 1; high++;
2399       /* shift up all the later entries in this row */
2400       for (ii=N; ii>=i; ii--) {
2401         rp[ii+1] = rp[ii];
2402         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2403       }
2404       if (N>=i) {
2405         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2406       }
2407       rp[i]                      = bcol;
2408       ap[bs2*i + bs*cidx + ridx] = value;
2409       a->nz++;
2410       noinsert1:;
2411       low = i;
2412     }
2413     ailen[brow] = nrow;
2414   }
2415   A->same_nonzero = PETSC_FALSE;
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2421 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2422 {
2423   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2424   Mat            outA;
2425   PetscErrorCode ierr;
2426   PetscBool      row_identity,col_identity;
2427 
2428   PetscFunctionBegin;
2429   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2430   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2431   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2432   if (!row_identity || !col_identity) {
2433     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2434   }
2435 
2436   outA            = inA;
2437   inA->factortype = MAT_FACTOR_LU;
2438 
2439   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2440 
2441   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2442   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2443   a->row = row;
2444   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2445   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2446   a->col = col;
2447 
2448   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2449   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2450    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2452 
2453   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2454   if (!a->solve_work) {
2455     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2456     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2457   }
2458   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2459 
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 EXTERN_C_BEGIN
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2466 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2467 {
2468   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2469   PetscInt    i,nz,mbs;
2470 
2471   PetscFunctionBegin;
2472   nz  = baij->maxnz;
2473   mbs = baij->mbs;
2474   for (i=0; i<nz; i++) {
2475     baij->j[i] = indices[i];
2476   }
2477   baij->nz = nz;
2478   for (i=0; i<mbs; i++) {
2479     baij->ilen[i] = baij->imax[i];
2480   }
2481   PetscFunctionReturn(0);
2482 }
2483 EXTERN_C_END
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2487 /*@
2488     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2489        in the matrix.
2490 
2491   Input Parameters:
2492 +  mat - the SeqBAIJ matrix
2493 -  indices - the column indices
2494 
2495   Level: advanced
2496 
2497   Notes:
2498     This can be called if you have precomputed the nonzero structure of the
2499   matrix and want to provide it to the matrix object to improve the performance
2500   of the MatSetValues() operation.
2501 
2502     You MUST have set the correct numbers of nonzeros per row in the call to
2503   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2504 
2505     MUST be called before any calls to MatSetValues();
2506 
2507 @*/
2508 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2509 {
2510   PetscErrorCode ierr;
2511 
2512   PetscFunctionBegin;
2513   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2514   PetscValidPointer(indices,2);
2515   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2521 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2522 {
2523   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2524   PetscErrorCode ierr;
2525   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2526   PetscReal      atmp;
2527   PetscScalar    *x,zero = 0.0;
2528   MatScalar      *aa;
2529   PetscInt       ncols,brow,krow,kcol;
2530 
2531   PetscFunctionBegin;
2532   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2533   bs   = A->rmap->bs;
2534   aa   = a->a;
2535   ai   = a->i;
2536   aj   = a->j;
2537   mbs  = a->mbs;
2538 
2539   ierr = VecSet(v,zero);CHKERRQ(ierr);
2540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2541   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2543   for (i=0; i<mbs; i++) {
2544     ncols = ai[1] - ai[0]; ai++;
2545     brow  = bs*i;
2546     for (j=0; j<ncols; j++){
2547       for (kcol=0; kcol<bs; kcol++){
2548         for (krow=0; krow<bs; krow++){
2549           atmp = PetscAbsScalar(*aa);aa++;
2550           row = brow + krow;    /* row index */
2551           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2552           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2553         }
2554       }
2555       aj++;
2556     }
2557   }
2558   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatCopy_SeqBAIJ"
2564 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2565 {
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   /* If the two matrices have the same copy implementation, use fast copy. */
2570   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2571     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2572     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2573     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2574 
2575     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2576     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2577     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2578   } else {
2579     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2580   }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2586 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2587 {
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2597 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2598 {
2599   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2600   PetscFunctionBegin;
2601   *array = a->a;
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2608 {
2609   PetscFunctionBegin;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2616 {
2617   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2618   PetscErrorCode ierr;
2619   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2620   PetscBLASInt   one=1;
2621 
2622   PetscFunctionBegin;
2623   if (str == SAME_NONZERO_PATTERN) {
2624     PetscScalar alpha = a;
2625     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2626     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2627   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2628     if (y->xtoy && y->XtoY != X) {
2629       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2630       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2631     }
2632     if (!y->xtoy) { /* get xtoy */
2633       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2634       y->XtoY = X;
2635       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2636     }
2637     for (i=0; i<x->nz; i++) {
2638       j = 0;
2639       while (j < bs2){
2640         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2641         j++;
2642       }
2643     }
2644     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2645   } else {
2646     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2647   }
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2654 {
2655   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2656   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2657   MatScalar      *aa = a->a;
2658 
2659   PetscFunctionBegin;
2660   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2667 {
2668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2669   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2670   MatScalar      *aa = a->a;
2671 
2672   PetscFunctionBegin;
2673   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2681 /*
2682     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2683 */
2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2685 {
2686   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2687   PetscErrorCode ierr;
2688   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2689   PetscInt       nz = a->i[m],row,*jj,mr,col;
2690 
2691   PetscFunctionBegin;
2692   *nn = n;
2693   if (!ia) PetscFunctionReturn(0);
2694   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2695   else {
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2697     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2698     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2699     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2700     jj = a->j;
2701     for (i=0; i<nz; i++) {
2702       collengths[jj[i]]++;
2703     }
2704     cia[0] = oshift;
2705     for (i=0; i<n; i++) {
2706       cia[i+1] = cia[i] + collengths[i];
2707     }
2708     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2709     jj   = a->j;
2710     for (row=0; row<m; row++) {
2711       mr = a->i[row+1] - a->i[row];
2712       for (i=0; i<mr; i++) {
2713         col = *jj++;
2714         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2715       }
2716     }
2717     ierr = PetscFree(collengths);CHKERRQ(ierr);
2718     *ia = cia; *ja = cja;
2719   }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2726 {
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   if (!ia) PetscFunctionReturn(0);
2731   ierr = PetscFree(*ia);CHKERRQ(ierr);
2732   ierr = PetscFree(*ja);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2738 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2739 {
2740   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2741   PetscErrorCode ierr;
2742   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2743   PetscScalar    dx,*y,*xx,*w3_array;
2744   PetscScalar    *vscale_array;
2745   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2746   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2747   void           *fctx = coloring->fctx;
2748   PetscBool      flg = PETSC_FALSE;
2749   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2750   Vec            x1_tmp;
2751 
2752   PetscFunctionBegin;
2753   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2754   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2755   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2756   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2757 
2758   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2759   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2760   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2761   if (flg) {
2762     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2763   } else {
2764     PetscBool  assembled;
2765     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2766     if (assembled) {
2767       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2768     }
2769   }
2770 
2771   x1_tmp = x1;
2772   if (!coloring->vscale){
2773     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2774   }
2775 
2776   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2777     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2778   }
2779   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2780 
2781   /* Set w1 = F(x1) */
2782   if (!coloring->fset) {
2783     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2784     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2785     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2786   } else {
2787     coloring->fset = PETSC_FALSE;
2788   }
2789 
2790   if (!coloring->w3) {
2791     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2793   }
2794   w3 = coloring->w3;
2795 
2796     CHKMEMQ;
2797     /* Compute all the local scale factors, including ghost points */
2798   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2799   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2800   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2801   if (ctype == IS_COLORING_GHOSTED){
2802     col_start = 0; col_end = N;
2803   } else if (ctype == IS_COLORING_GLOBAL){
2804     xx = xx - start;
2805     vscale_array = vscale_array - start;
2806     col_start = start; col_end = N + start;
2807   }    CHKMEMQ;
2808   for (col=col_start; col<col_end; col++){
2809     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2810     if (coloring->htype[0] == 'w') {
2811       dx = 1.0 + unorm;
2812     } else {
2813       dx  = xx[col];
2814     }
2815     if (dx == (PetscScalar)0.0) dx = 1.0;
2816 #if !defined(PETSC_USE_COMPLEX)
2817     if (dx < umin && dx >= 0.0)      dx = umin;
2818     else if (dx < 0.0 && dx > -umin) dx = -umin;
2819 #else
2820     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2821     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2822 #endif
2823     dx               *= epsilon;
2824     vscale_array[col] = (PetscScalar)1.0/dx;
2825   }     CHKMEMQ;
2826   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2827   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2828   if (ctype == IS_COLORING_GLOBAL){
2829     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2830     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2831   }
2832   CHKMEMQ;
2833   if (coloring->vscaleforrow) {
2834     vscaleforrow = coloring->vscaleforrow;
2835   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2836 
2837   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2838   /*
2839     Loop over each color
2840   */
2841   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2842   for (k=0; k<coloring->ncolors; k++) {
2843     coloring->currentcolor = k;
2844     for (i=0; i<bs; i++) {
2845       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2846       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2847       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2848       /*
2849 	Loop over each column associated with color
2850 	adding the perturbation to the vector w3.
2851       */
2852       for (l=0; l<coloring->ncolumns[k]; l++) {
2853 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2854 	if (coloring->htype[0] == 'w') {
2855 	  dx = 1.0 + unorm;
2856 	} else {
2857 	  dx  = xx[col];
2858 	}
2859 	if (dx == (PetscScalar)0.0) dx = 1.0;
2860 #if !defined(PETSC_USE_COMPLEX)
2861 	if (dx < umin && dx >= 0.0)      dx = umin;
2862 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2863 #else
2864 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2865 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2866 #endif
2867 	dx            *= epsilon;
2868 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2869 	w3_array[col] += dx;
2870       }
2871       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2872       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2873 
2874       /*
2875 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2876 	w2 = F(x1 + dx) - F(x1)
2877       */
2878       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2879       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2880       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2881       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2882 
2883       /*
2884 	Loop over rows of vector, putting results into Jacobian matrix
2885       */
2886       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2887       for (l=0; l<coloring->nrows[k]; l++) {
2888 	row    = bs*coloring->rows[k][l];             /* local row index */
2889 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2890         for (j=0; j<bs; j++) {
2891   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2892           srows[j]  = row + start + j;
2893         }
2894 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2895       }
2896       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2897     }
2898   } /* endof for each color */
2899   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2900   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2901   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2902   ierr = PetscFree(srows);CHKERRQ(ierr);
2903 
2904   coloring->currentcolor = -1;
2905   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2906   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2907   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2908   PetscFunctionReturn(0);
2909 }
2910 
2911 /* -------------------------------------------------------------------*/
2912 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2913        MatGetRow_SeqBAIJ,
2914        MatRestoreRow_SeqBAIJ,
2915        MatMult_SeqBAIJ_N,
2916 /* 4*/ MatMultAdd_SeqBAIJ_N,
2917        MatMultTranspose_SeqBAIJ,
2918        MatMultTransposeAdd_SeqBAIJ,
2919        0,
2920        0,
2921        0,
2922 /*10*/ 0,
2923        MatLUFactor_SeqBAIJ,
2924        0,
2925        0,
2926        MatTranspose_SeqBAIJ,
2927 /*15*/ MatGetInfo_SeqBAIJ,
2928        MatEqual_SeqBAIJ,
2929        MatGetDiagonal_SeqBAIJ,
2930        MatDiagonalScale_SeqBAIJ,
2931        MatNorm_SeqBAIJ,
2932 /*20*/ 0,
2933        MatAssemblyEnd_SeqBAIJ,
2934        MatSetOption_SeqBAIJ,
2935        MatZeroEntries_SeqBAIJ,
2936 /*24*/ MatZeroRows_SeqBAIJ,
2937        0,
2938        0,
2939        0,
2940        0,
2941 /*29*/ MatSetUp_SeqBAIJ,
2942        0,
2943        0,
2944        0,
2945        0,
2946 /*34*/ MatDuplicate_SeqBAIJ,
2947        0,
2948        0,
2949        MatILUFactor_SeqBAIJ,
2950        0,
2951 /*39*/ MatAXPY_SeqBAIJ,
2952        MatGetSubMatrices_SeqBAIJ,
2953        MatIncreaseOverlap_SeqBAIJ,
2954        MatGetValues_SeqBAIJ,
2955        MatCopy_SeqBAIJ,
2956 /*44*/ 0,
2957        MatScale_SeqBAIJ,
2958        0,
2959        0,
2960        MatZeroRowsColumns_SeqBAIJ,
2961 /*49*/ 0,
2962        MatGetRowIJ_SeqBAIJ,
2963        MatRestoreRowIJ_SeqBAIJ,
2964        MatGetColumnIJ_SeqBAIJ,
2965        MatRestoreColumnIJ_SeqBAIJ,
2966 /*54*/ MatFDColoringCreate_SeqAIJ,
2967        0,
2968        0,
2969        0,
2970        MatSetValuesBlocked_SeqBAIJ,
2971 /*59*/ MatGetSubMatrix_SeqBAIJ,
2972        MatDestroy_SeqBAIJ,
2973        MatView_SeqBAIJ,
2974        0,
2975        0,
2976 /*64*/ 0,
2977        0,
2978        0,
2979        0,
2980        0,
2981 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2982        0,
2983        MatConvert_Basic,
2984        0,
2985        0,
2986 /*74*/ 0,
2987        MatFDColoringApply_BAIJ,
2988        0,
2989        0,
2990        0,
2991 /*79*/ 0,
2992        0,
2993        0,
2994        0,
2995        MatLoad_SeqBAIJ,
2996 /*84*/ 0,
2997        0,
2998        0,
2999        0,
3000        0,
3001 /*89*/ 0,
3002        0,
3003        0,
3004        0,
3005        0,
3006 /*94*/ 0,
3007        0,
3008        0,
3009        0,
3010        0,
3011 /*99*/0,
3012        0,
3013        0,
3014        0,
3015        0,
3016 /*104*/0,
3017        MatRealPart_SeqBAIJ,
3018        MatImaginaryPart_SeqBAIJ,
3019        0,
3020        0,
3021 /*109*/0,
3022        0,
3023        0,
3024        0,
3025        MatMissingDiagonal_SeqBAIJ,
3026 /*114*/0,
3027        0,
3028        0,
3029        0,
3030        0,
3031 /*119*/0,
3032        0,
3033        MatMultHermitianTranspose_SeqBAIJ,
3034        MatMultHermitianTransposeAdd_SeqBAIJ,
3035        0,
3036 /*124*/0,
3037        0,
3038        MatInvertBlockDiagonal_SeqBAIJ
3039 };
3040 
3041 EXTERN_C_BEGIN
3042 #undef __FUNCT__
3043 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3044 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3045 {
3046   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3047   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3048   PetscErrorCode ierr;
3049 
3050   PetscFunctionBegin;
3051   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3052 
3053   /* allocate space for values if not already there */
3054   if (!aij->saved_values) {
3055     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3056     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3057   }
3058 
3059   /* copy values over */
3060   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3061   PetscFunctionReturn(0);
3062 }
3063 EXTERN_C_END
3064 
3065 EXTERN_C_BEGIN
3066 #undef __FUNCT__
3067 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3068 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3069 {
3070   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3071   PetscErrorCode ierr;
3072   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3073 
3074   PetscFunctionBegin;
3075   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3076   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3077 
3078   /* copy values over */
3079   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3080   PetscFunctionReturn(0);
3081 }
3082 EXTERN_C_END
3083 
3084 EXTERN_C_BEGIN
3085 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3086 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3087 EXTERN_C_END
3088 
3089 EXTERN_C_BEGIN
3090 #undef __FUNCT__
3091 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3092 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3093 {
3094   Mat_SeqBAIJ    *b;
3095   PetscErrorCode ierr;
3096   PetscInt       i,mbs,nbs,bs2;
3097   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3098 
3099   PetscFunctionBegin;
3100   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3101   if (nz == MAT_SKIP_ALLOCATION) {
3102     skipallocation = PETSC_TRUE;
3103     nz             = 0;
3104   }
3105 
3106   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3107   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3108   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3109   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3110   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3111 
3112   B->preallocated = PETSC_TRUE;
3113 
3114   mbs  = B->rmap->n/bs;
3115   nbs  = B->cmap->n/bs;
3116   bs2  = bs*bs;
3117 
3118   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3119 
3120   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3121   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3122   if (nnz) {
3123     for (i=0; i<mbs; i++) {
3124       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3125       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3126     }
3127   }
3128 
3129   b       = (Mat_SeqBAIJ*)B->data;
3130   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3131     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3132   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3133 
3134   if (!flg) {
3135     switch (bs) {
3136     case 1:
3137       B->ops->mult            = MatMult_SeqBAIJ_1;
3138       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3139       B->ops->sor             = MatSOR_SeqBAIJ_1;
3140       break;
3141     case 2:
3142       B->ops->mult            = MatMult_SeqBAIJ_2;
3143       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3144       B->ops->sor             = MatSOR_SeqBAIJ_2;
3145       break;
3146     case 3:
3147       B->ops->mult            = MatMult_SeqBAIJ_3;
3148       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3149       B->ops->sor             = MatSOR_SeqBAIJ_3;
3150       break;
3151     case 4:
3152       B->ops->mult            = MatMult_SeqBAIJ_4;
3153       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3154       B->ops->sor             = MatSOR_SeqBAIJ_4;
3155       break;
3156     case 5:
3157       B->ops->mult            = MatMult_SeqBAIJ_5;
3158       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3159       B->ops->sor             = MatSOR_SeqBAIJ_5;
3160       break;
3161     case 6:
3162       B->ops->mult            = MatMult_SeqBAIJ_6;
3163       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3164       B->ops->sor             = MatSOR_SeqBAIJ_6;
3165       break;
3166     case 7:
3167       B->ops->mult            = MatMult_SeqBAIJ_7;
3168       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3169       B->ops->sor             = MatSOR_SeqBAIJ_7;
3170       break;
3171     case 15:
3172       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3173       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3174       B->ops->sor             = MatSOR_SeqBAIJ_N;
3175       break;
3176     default:
3177       B->ops->mult            = MatMult_SeqBAIJ_N;
3178       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3179       B->ops->sor             = MatSOR_SeqBAIJ_N;
3180       break;
3181     }
3182   }
3183   b->mbs       = mbs;
3184   b->nbs       = nbs;
3185   if (!skipallocation) {
3186     if (!b->imax) {
3187       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3188       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3189       b->free_imax_ilen = PETSC_TRUE;
3190     }
3191     /* b->ilen will count nonzeros in each block row so far. */
3192     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3193     if (!nnz) {
3194       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3195       else if (nz < 0) nz = 1;
3196       for (i=0; i<mbs; i++) b->imax[i] = nz;
3197       nz = nz*mbs;
3198     } else {
3199       nz = 0;
3200       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3201     }
3202 
3203     /* allocate the matrix space */
3204     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3205     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3206     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3207     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3208     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3209     b->singlemalloc = PETSC_TRUE;
3210     b->i[0] = 0;
3211     for (i=1; i<mbs+1; i++) {
3212       b->i[i] = b->i[i-1] + b->imax[i-1];
3213     }
3214     b->free_a     = PETSC_TRUE;
3215     b->free_ij    = PETSC_TRUE;
3216   } else {
3217     b->free_a     = PETSC_FALSE;
3218     b->free_ij    = PETSC_FALSE;
3219   }
3220 
3221   b->bs2              = bs2;
3222   b->mbs              = mbs;
3223   b->nz               = 0;
3224   b->maxnz            = nz;
3225   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3226   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3227   PetscFunctionReturn(0);
3228 }
3229 EXTERN_C_END
3230 
3231 EXTERN_C_BEGIN
3232 #undef __FUNCT__
3233 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3234 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3235 {
3236   PetscInt       i,m,nz,nz_max=0,*nnz;
3237   PetscScalar    *values=0;
3238   PetscErrorCode ierr;
3239 
3240   PetscFunctionBegin;
3241   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3242   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3244   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3245   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3246   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3247   m = B->rmap->n/bs;
3248 
3249   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3250   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3251   for (i=0; i<m; i++) {
3252     nz = ii[i+1]- ii[i];
3253     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3254     nz_max = PetscMax(nz_max, nz);
3255     nnz[i] = nz;
3256   }
3257   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3258   ierr = PetscFree(nnz);CHKERRQ(ierr);
3259 
3260   values = (PetscScalar*)V;
3261   if (!values) {
3262     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3263     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3264   }
3265   for (i=0; i<m; i++) {
3266     PetscInt          ncols  = ii[i+1] - ii[i];
3267     const PetscInt    *icols = jj + ii[i];
3268     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3269     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3270   }
3271   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3272   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3273   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3274   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3275   PetscFunctionReturn(0);
3276 }
3277 EXTERN_C_END
3278 
3279 
3280 EXTERN_C_BEGIN
3281 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3282 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3283 #if defined(PETSC_HAVE_MUMPS)
3284 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3285 #endif
3286 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3287 EXTERN_C_END
3288 
3289 /*MC
3290    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3291    block sparse compressed row format.
3292 
3293    Options Database Keys:
3294 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3295 
3296   Level: beginner
3297 
3298 .seealso: MatCreateSeqBAIJ()
3299 M*/
3300 
3301 EXTERN_C_BEGIN
3302 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3303 EXTERN_C_END
3304 
3305 EXTERN_C_BEGIN
3306 #undef __FUNCT__
3307 #define __FUNCT__ "MatCreate_SeqBAIJ"
3308 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3309 {
3310   PetscErrorCode ierr;
3311   PetscMPIInt    size;
3312   Mat_SeqBAIJ    *b;
3313 
3314   PetscFunctionBegin;
3315   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3316   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3317 
3318   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3319   B->data = (void*)b;
3320   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3321   b->row                   = 0;
3322   b->col                   = 0;
3323   b->icol                  = 0;
3324   b->reallocs              = 0;
3325   b->saved_values          = 0;
3326 
3327   b->roworiented           = PETSC_TRUE;
3328   b->nonew                 = 0;
3329   b->diag                  = 0;
3330   b->solve_work            = 0;
3331   b->mult_work             = 0;
3332   B->spptr                 = 0;
3333   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3334   b->keepnonzeropattern    = PETSC_FALSE;
3335   b->xtoy                  = 0;
3336   b->XtoY                  = 0;
3337   B->same_nonzero          = PETSC_FALSE;
3338 
3339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3340                                      "MatGetFactorAvailable_seqbaij_petsc",
3341                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3343                                      "MatGetFactor_seqbaij_petsc",
3344                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3346                                      "MatGetFactor_seqbaij_bstrm",
3347                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3348 #if defined(PETSC_HAVE_MUMPS)
3349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3350 #endif
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3352                                      "MatInvertBlockDiagonal_SeqBAIJ",
3353                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3355                                      "MatStoreValues_SeqBAIJ",
3356                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3357   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3358                                      "MatRetrieveValues_SeqBAIJ",
3359                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3361                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3362                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3364                                      "MatConvert_SeqBAIJ_SeqAIJ",
3365                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3367                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3368                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3370                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3371                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3373                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3374                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3376                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3377                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3379                                      "MatIsTranspose_SeqBAIJ",
3380                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3381   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3382   PetscFunctionReturn(0);
3383 }
3384 EXTERN_C_END
3385 
3386 #undef __FUNCT__
3387 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3388 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3389 {
3390   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3391   PetscErrorCode ierr;
3392   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3393 
3394   PetscFunctionBegin;
3395   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3396 
3397   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3398     c->imax = a->imax;
3399     c->ilen = a->ilen;
3400     c->free_imax_ilen = PETSC_FALSE;
3401   } else {
3402     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3403     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3404     for (i=0; i<mbs; i++) {
3405       c->imax[i] = a->imax[i];
3406       c->ilen[i] = a->ilen[i];
3407     }
3408     c->free_imax_ilen = PETSC_TRUE;
3409   }
3410 
3411   /* allocate the matrix space */
3412   if (mallocmatspace){
3413     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3414       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3415       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3416       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3417       c->i            = a->i;
3418       c->j            = a->j;
3419       c->singlemalloc = PETSC_FALSE;
3420       c->free_a       = PETSC_TRUE;
3421       c->free_ij      = PETSC_FALSE;
3422       c->parent       = A;
3423       C->preallocated = PETSC_TRUE;
3424       C->assembled    = PETSC_TRUE;
3425       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3426       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3427       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3428     } else {
3429       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3430       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3431       c->singlemalloc = PETSC_TRUE;
3432       c->free_a       = PETSC_TRUE;
3433       c->free_ij      = PETSC_TRUE;
3434       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3435       if (mbs > 0) {
3436 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3437 	if (cpvalues == MAT_COPY_VALUES) {
3438 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3439 	} else {
3440 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3441 	}
3442       }
3443       C->preallocated = PETSC_TRUE;
3444       C->assembled    = PETSC_TRUE;
3445     }
3446   }
3447 
3448   c->roworiented = a->roworiented;
3449   c->nonew       = a->nonew;
3450   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3451   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3452   c->bs2         = a->bs2;
3453   c->mbs         = a->mbs;
3454   c->nbs         = a->nbs;
3455 
3456   if (a->diag) {
3457     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3458       c->diag      = a->diag;
3459       c->free_diag = PETSC_FALSE;
3460     } else {
3461       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3462       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3463       for (i=0; i<mbs; i++) {
3464         c->diag[i] = a->diag[i];
3465       }
3466       c->free_diag = PETSC_TRUE;
3467     }
3468   } else c->diag        = 0;
3469   c->nz                 = a->nz;
3470   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3471   c->solve_work         = 0;
3472   c->mult_work          = 0;
3473 
3474   c->compressedrow.use     = a->compressedrow.use;
3475   c->compressedrow.nrows   = a->compressedrow.nrows;
3476   c->compressedrow.check   = a->compressedrow.check;
3477   if (a->compressedrow.use){
3478     i = a->compressedrow.nrows;
3479     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3480     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3481     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3482     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3483   } else {
3484     c->compressedrow.use    = PETSC_FALSE;
3485     c->compressedrow.i      = PETSC_NULL;
3486     c->compressedrow.rindex = PETSC_NULL;
3487   }
3488   C->same_nonzero = A->same_nonzero;
3489   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3490   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3496 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3497 {
3498     PetscErrorCode ierr;
3499 
3500   PetscFunctionBegin;
3501   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3502   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3503   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3504   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatLoad_SeqBAIJ"
3510 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3511 {
3512   Mat_SeqBAIJ    *a;
3513   PetscErrorCode ierr;
3514   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3515   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3516   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3517   PetscInt       *masked,nmask,tmp,bs2,ishift;
3518   PetscMPIInt    size;
3519   int            fd;
3520   PetscScalar    *aa;
3521   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3522 
3523   PetscFunctionBegin;
3524   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3525   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3526   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3527   bs2  = bs*bs;
3528 
3529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3531   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3532   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3533   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3534   M = header[1]; N = header[2]; nz = header[3];
3535 
3536   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3537   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3538 
3539   /*
3540      This code adds extra rows to make sure the number of rows is
3541     divisible by the blocksize
3542   */
3543   mbs        = M/bs;
3544   extra_rows = bs - M + bs*(mbs);
3545   if (extra_rows == bs) extra_rows = 0;
3546   else                  mbs++;
3547   if (extra_rows) {
3548     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3549   }
3550 
3551   /* Set global sizes if not already set */
3552   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3553     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3554   } else { /* Check if the matrix global sizes are correct */
3555     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3556     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3557       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3558     }
3559     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3560   }
3561 
3562   /* read in row lengths */
3563   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3564   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3565   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3566 
3567   /* read in column indices */
3568   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3569   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3570   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3571 
3572   /* loop over row lengths determining block row lengths */
3573   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3574   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3575   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3576   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3577   rowcount = 0;
3578   nzcount = 0;
3579   for (i=0; i<mbs; i++) {
3580     nmask = 0;
3581     for (j=0; j<bs; j++) {
3582       kmax = rowlengths[rowcount];
3583       for (k=0; k<kmax; k++) {
3584         tmp = jj[nzcount++]/bs;
3585         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3586       }
3587       rowcount++;
3588     }
3589     browlengths[i] += nmask;
3590     /* zero out the mask elements we set */
3591     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3592   }
3593 
3594   /* Do preallocation  */
3595   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3596   a = (Mat_SeqBAIJ*)newmat->data;
3597 
3598   /* set matrix "i" values */
3599   a->i[0] = 0;
3600   for (i=1; i<= mbs; i++) {
3601     a->i[i]      = a->i[i-1] + browlengths[i-1];
3602     a->ilen[i-1] = browlengths[i-1];
3603   }
3604   a->nz         = 0;
3605   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3606 
3607   /* read in nonzero values */
3608   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3609   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3610   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3611 
3612   /* set "a" and "j" values into matrix */
3613   nzcount = 0; jcount = 0;
3614   for (i=0; i<mbs; i++) {
3615     nzcountb = nzcount;
3616     nmask    = 0;
3617     for (j=0; j<bs; j++) {
3618       kmax = rowlengths[i*bs+j];
3619       for (k=0; k<kmax; k++) {
3620         tmp = jj[nzcount++]/bs;
3621 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3622       }
3623     }
3624     /* sort the masked values */
3625     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3626 
3627     /* set "j" values into matrix */
3628     maskcount = 1;
3629     for (j=0; j<nmask; j++) {
3630       a->j[jcount++]  = masked[j];
3631       mask[masked[j]] = maskcount++;
3632     }
3633     /* set "a" values into matrix */
3634     ishift = bs2*a->i[i];
3635     for (j=0; j<bs; j++) {
3636       kmax = rowlengths[i*bs+j];
3637       for (k=0; k<kmax; k++) {
3638         tmp       = jj[nzcountb]/bs ;
3639         block     = mask[tmp] - 1;
3640         point     = jj[nzcountb] - bs*tmp;
3641         idx       = ishift + bs2*block + j + bs*point;
3642         a->a[idx] = (MatScalar)aa[nzcountb++];
3643       }
3644     }
3645     /* zero out the mask elements we set */
3646     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3647   }
3648   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3649 
3650   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3651   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3652   ierr = PetscFree(aa);CHKERRQ(ierr);
3653   ierr = PetscFree(jj);CHKERRQ(ierr);
3654   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3655 
3656   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3659   PetscFunctionReturn(0);
3660 }
3661 
3662 #undef __FUNCT__
3663 #define __FUNCT__ "MatCreateSeqBAIJ"
3664 /*@C
3665    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3666    compressed row) format.  For good matrix assembly performance the
3667    user should preallocate the matrix storage by setting the parameter nz
3668    (or the array nnz).  By setting these parameters accurately, performance
3669    during matrix assembly can be increased by more than a factor of 50.
3670 
3671    Collective on MPI_Comm
3672 
3673    Input Parameters:
3674 +  comm - MPI communicator, set to PETSC_COMM_SELF
3675 .  bs - size of block
3676 .  m - number of rows
3677 .  n - number of columns
3678 .  nz - number of nonzero blocks  per block row (same for all rows)
3679 -  nnz - array containing the number of nonzero blocks in the various block rows
3680          (possibly different for each block row) or PETSC_NULL
3681 
3682    Output Parameter:
3683 .  A - the matrix
3684 
3685    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3686    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3687    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3688 
3689    Options Database Keys:
3690 .   -mat_no_unroll - uses code that does not unroll the loops in the
3691                      block calculations (much slower)
3692 .    -mat_block_size - size of the blocks to use
3693 
3694    Level: intermediate
3695 
3696    Notes:
3697    The number of rows and columns must be divisible by blocksize.
3698 
3699    If the nnz parameter is given then the nz parameter is ignored
3700 
3701    A nonzero block is any block that as 1 or more nonzeros in it
3702 
3703    The block AIJ format is fully compatible with standard Fortran 77
3704    storage.  That is, the stored row and column indices can begin at
3705    either one (as in Fortran) or zero.  See the users' manual for details.
3706 
3707    Specify the preallocated storage with either nz or nnz (not both).
3708    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3709    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3710    matrices.
3711 
3712 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3713 @*/
3714 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3715 {
3716   PetscErrorCode ierr;
3717 
3718   PetscFunctionBegin;
3719   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3720   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3721   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3722   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3723   PetscFunctionReturn(0);
3724 }
3725 
3726 #undef __FUNCT__
3727 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3728 /*@C
3729    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3730    per row in the matrix. For good matrix assembly performance the
3731    user should preallocate the matrix storage by setting the parameter nz
3732    (or the array nnz).  By setting these parameters accurately, performance
3733    during matrix assembly can be increased by more than a factor of 50.
3734 
3735    Collective on MPI_Comm
3736 
3737    Input Parameters:
3738 +  A - the matrix
3739 .  bs - size of block
3740 .  nz - number of block nonzeros per block row (same for all rows)
3741 -  nnz - array containing the number of block nonzeros in the various block rows
3742          (possibly different for each block row) or PETSC_NULL
3743 
3744    Options Database Keys:
3745 .   -mat_no_unroll - uses code that does not unroll the loops in the
3746                      block calculations (much slower)
3747 .    -mat_block_size - size of the blocks to use
3748 
3749    Level: intermediate
3750 
3751    Notes:
3752    If the nnz parameter is given then the nz parameter is ignored
3753 
3754    You can call MatGetInfo() to get information on how effective the preallocation was;
3755    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3756    You can also run with the option -info and look for messages with the string
3757    malloc in them to see if additional memory allocation was needed.
3758 
3759    The block AIJ format is fully compatible with standard Fortran 77
3760    storage.  That is, the stored row and column indices can begin at
3761    either one (as in Fortran) or zero.  See the users' manual for details.
3762 
3763    Specify the preallocated storage with either nz or nnz (not both).
3764    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3765    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3766 
3767 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3768 @*/
3769 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3770 {
3771   PetscErrorCode ierr;
3772 
3773   PetscFunctionBegin;
3774   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3775   PetscValidType(B,1);
3776   PetscValidLogicalCollectiveInt(B,bs,2);
3777   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3778   PetscFunctionReturn(0);
3779 }
3780 
3781 #undef __FUNCT__
3782 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3783 /*@C
3784    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3785    (the default sequential PETSc format).
3786 
3787    Collective on MPI_Comm
3788 
3789    Input Parameters:
3790 +  A - the matrix
3791 .  i - the indices into j for the start of each local row (starts with zero)
3792 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3793 -  v - optional values in the matrix
3794 
3795    Level: developer
3796 
3797 .keywords: matrix, aij, compressed row, sparse
3798 
3799 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3800 @*/
3801 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3802 {
3803   PetscErrorCode ierr;
3804 
3805   PetscFunctionBegin;
3806   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3807   PetscValidType(B,1);
3808   PetscValidLogicalCollectiveInt(B,bs,2);
3809   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3810   PetscFunctionReturn(0);
3811 }
3812 
3813 
3814 #undef __FUNCT__
3815 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3816 /*@
3817      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3818 
3819      Collective on MPI_Comm
3820 
3821    Input Parameters:
3822 +  comm - must be an MPI communicator of size 1
3823 .  bs - size of block
3824 .  m - number of rows
3825 .  n - number of columns
3826 .  i - row indices
3827 .  j - column indices
3828 -  a - matrix values
3829 
3830    Output Parameter:
3831 .  mat - the matrix
3832 
3833    Level: advanced
3834 
3835    Notes:
3836        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3837     once the matrix is destroyed
3838 
3839        You cannot set new nonzero locations into this matrix, that will generate an error.
3840 
3841        The i and j indices are 0 based
3842 
3843        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3844 
3845 
3846 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3847 
3848 @*/
3849 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3850 {
3851   PetscErrorCode ierr;
3852   PetscInt       ii;
3853   Mat_SeqBAIJ    *baij;
3854 
3855   PetscFunctionBegin;
3856   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3857   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3858 
3859   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3860   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3861   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3862   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3863   baij = (Mat_SeqBAIJ*)(*mat)->data;
3864   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3865   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3866 
3867   baij->i = i;
3868   baij->j = j;
3869   baij->a = a;
3870   baij->singlemalloc = PETSC_FALSE;
3871   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3872   baij->free_a       = PETSC_FALSE;
3873   baij->free_ij      = PETSC_FALSE;
3874 
3875   for (ii=0; ii<m; ii++) {
3876     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3877 #if defined(PETSC_USE_DEBUG)
3878     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3879 #endif
3880   }
3881 #if defined(PETSC_USE_DEBUG)
3882   for (ii=0; ii<baij->i[m]; ii++) {
3883     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3884     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3885   }
3886 #endif
3887 
3888   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3889   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3890   PetscFunctionReturn(0);
3891 }
3892