xref: /petsc/src/mat/impls/baij/seq/baij.c (revision bcaeba4d41d6ca6c6dc4189db20683073a9959ce)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1305 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1310   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1311 
1312   PetscFunctionBegin;
1313   *nn = n;
1314   if (!ia) PetscFunctionReturn(0);
1315   if (symmetric) {
1316     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1317     nz   = tia[n];
1318   } else {
1319     tia = a->i; tja = a->j;
1320   }
1321 
1322   if (!blockcompressed && bs > 1) {
1323     (*nn) *= bs;
1324     /* malloc & create the natural set of indices */
1325     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1326     if (n) {
1327       (*ia)[0] = 0;
1328       for (j=1; j<bs; j++) {
1329         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1330       }
1331     }
1332 
1333     for (i=1; i<n; i++) {
1334       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1335       for (j=1; j<bs; j++) {
1336         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1337       }
1338     }
1339     if (n) {
1340       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1341     }
1342 
1343     if (inja) {
1344       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1345       cnt = 0;
1346       for (i=0; i<n; i++) {
1347         for (j=0; j<bs; j++) {
1348           for (k=tia[i]; k<tia[i+1]; k++) {
1349             for (l=0; l<bs; l++) {
1350               (*ja)[cnt++] = bs*tja[k] + l;
1351 	    }
1352           }
1353         }
1354       }
1355     }
1356 
1357     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1358       ierr = PetscFree(tia);CHKERRQ(ierr);
1359       ierr = PetscFree(tja);CHKERRQ(ierr);
1360     }
1361   } else if (oshift == 1) {
1362     if (symmetric) {
1363       nz = tia[A->rmap->n/bs];
1364       /*  add 1 to i and j indices */
1365       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1366       *ia = tia;
1367       if (ja) {
1368 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1369         *ja = tja;
1370       }
1371     } else {
1372       nz = a->i[A->rmap->n/bs];
1373       /* malloc space and  add 1 to i and j indices */
1374       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1375       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1376       if (ja) {
1377 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1378 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1379       }
1380     }
1381   } else {
1382     *ia = tia;
1383     if (ja) *ja = tja;
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1390 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1391 {
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   if (!ia) PetscFunctionReturn(0);
1396   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1397     ierr = PetscFree(*ia);CHKERRQ(ierr);
1398     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1405 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1406 {
1407   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1408   PetscErrorCode ierr;
1409 
1410   PetscFunctionBegin;
1411 #if defined(PETSC_USE_LOG)
1412   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1413 #endif
1414   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1415   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1416   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1417   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1418   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1419   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1420   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1421   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1422   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1423   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1424   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1425   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1426   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1427 
1428   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1429   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1430   ierr = PetscFree(A->data);CHKERRQ(ierr);
1431 
1432   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1433   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1448 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1449 {
1450   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1451   PetscErrorCode ierr;
1452 
1453   PetscFunctionBegin;
1454   switch (op) {
1455   case MAT_ROW_ORIENTED:
1456     a->roworiented    = flg;
1457     break;
1458   case MAT_KEEP_NONZERO_PATTERN:
1459     a->keepnonzeropattern = flg;
1460     break;
1461   case MAT_NEW_NONZERO_LOCATIONS:
1462     a->nonew          = (flg ? 0 : 1);
1463     break;
1464   case MAT_NEW_NONZERO_LOCATION_ERR:
1465     a->nonew          = (flg ? -1 : 0);
1466     break;
1467   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1468     a->nonew          = (flg ? -2 : 0);
1469     break;
1470   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1471     a->nounused       = (flg ? -1 : 0);
1472     break;
1473   case MAT_CHECK_COMPRESSED_ROW:
1474     a->compressedrow.check = flg;
1475     break;
1476   case MAT_NEW_DIAGONALS:
1477   case MAT_IGNORE_OFF_PROC_ENTRIES:
1478   case MAT_USE_HASH_TABLE:
1479     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1480     break;
1481   case MAT_SPD:
1482   case MAT_SYMMETRIC:
1483   case MAT_STRUCTURALLY_SYMMETRIC:
1484   case MAT_HERMITIAN:
1485   case MAT_SYMMETRY_ETERNAL:
1486     /* These options are handled directly by MatSetOption() */
1487     break;
1488   default:
1489     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1496 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1497 {
1498   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1499   PetscErrorCode ierr;
1500   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1501   MatScalar      *aa,*aa_i;
1502   PetscScalar    *v_i;
1503 
1504   PetscFunctionBegin;
1505   bs  = A->rmap->bs;
1506   ai  = a->i;
1507   aj  = a->j;
1508   aa  = a->a;
1509   bs2 = a->bs2;
1510 
1511   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1512 
1513   bn  = row/bs;   /* Block number */
1514   bp  = row % bs; /* Block Position */
1515   M   = ai[bn+1] - ai[bn];
1516   *nz = bs*M;
1517 
1518   if (v) {
1519     *v = 0;
1520     if (*nz) {
1521       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1522       for (i=0; i<M; i++) { /* for each block in the block row */
1523         v_i  = *v + i*bs;
1524         aa_i = aa + bs2*(ai[bn] + i);
1525         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1526       }
1527     }
1528   }
1529 
1530   if (idx) {
1531     *idx = 0;
1532     if (*nz) {
1533       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1534       for (i=0; i<M; i++) { /* for each block in the block row */
1535         idx_i = *idx + i*bs;
1536         itmp  = bs*aj[ai[bn] + i];
1537         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1538       }
1539     }
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1546 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1547 {
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1552   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1560 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1561 {
1562   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1563   Mat            C;
1564   PetscErrorCode ierr;
1565   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1566   PetscInt       *rows,*cols,bs2=a->bs2;
1567   MatScalar      *array;
1568 
1569   PetscFunctionBegin;
1570   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1571   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1572     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1573     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1574 
1575     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1576     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1577     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1578     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1579     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1580     ierr = PetscFree(col);CHKERRQ(ierr);
1581   } else {
1582     C = *B;
1583   }
1584 
1585   array = a->a;
1586   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1587   for (i=0; i<mbs; i++) {
1588     cols[0] = i*bs;
1589     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1590     len = ai[i+1] - ai[i];
1591     for (j=0; j<len; j++) {
1592       rows[0] = (*aj++)*bs;
1593       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1594       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1595       array += bs2;
1596     }
1597   }
1598   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1599 
1600   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602 
1603   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1604     *B = C;
1605   } else {
1606     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 EXTERN_C_BEGIN
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1614 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1615 {
1616   PetscErrorCode ierr;
1617   Mat            Btrans;
1618 
1619   PetscFunctionBegin;
1620   *f = PETSC_FALSE;
1621   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1622   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1623   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1624   PetscFunctionReturn(0);
1625 }
1626 EXTERN_C_END
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1630 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1631 {
1632   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1633   PetscErrorCode ierr;
1634   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1635   int            fd;
1636   PetscScalar    *aa;
1637   FILE           *file;
1638 
1639   PetscFunctionBegin;
1640   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1641   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1642   col_lens[0] = MAT_FILE_CLASSID;
1643 
1644   col_lens[1] = A->rmap->N;
1645   col_lens[2] = A->cmap->n;
1646   col_lens[3] = a->nz*bs2;
1647 
1648   /* store lengths of each row and write (including header) to file */
1649   count = 0;
1650   for (i=0; i<a->mbs; i++) {
1651     for (j=0; j<bs; j++) {
1652       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1653     }
1654   }
1655   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1656   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1657 
1658   /* store column indices (zero start index) */
1659   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1660   count = 0;
1661   for (i=0; i<a->mbs; i++) {
1662     for (j=0; j<bs; j++) {
1663       for (k=a->i[i]; k<a->i[i+1]; k++) {
1664         for (l=0; l<bs; l++) {
1665           jj[count++] = bs*a->j[k] + l;
1666         }
1667       }
1668     }
1669   }
1670   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1671   ierr = PetscFree(jj);CHKERRQ(ierr);
1672 
1673   /* store nonzero values */
1674   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1675   count = 0;
1676   for (i=0; i<a->mbs; i++) {
1677     for (j=0; j<bs; j++) {
1678       for (k=a->i[i]; k<a->i[i+1]; k++) {
1679         for (l=0; l<bs; l++) {
1680           aa[count++] = a->a[bs2*k + l*bs + j];
1681         }
1682       }
1683     }
1684   }
1685   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1686   ierr = PetscFree(aa);CHKERRQ(ierr);
1687 
1688   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1689   if (file) {
1690     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1697 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1698 {
1699   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1700   PetscErrorCode    ierr;
1701   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1702   PetscViewerFormat format;
1703 
1704   PetscFunctionBegin;
1705   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1706   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1707     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1708   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1709     Mat aij;
1710     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1711     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1712     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1713   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1714      PetscFunctionReturn(0);
1715   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1716     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1717     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1718     for (i=0; i<a->mbs; i++) {
1719       for (j=0; j<bs; j++) {
1720         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1721         for (k=a->i[i]; k<a->i[i+1]; k++) {
1722           for (l=0; l<bs; l++) {
1723 #if defined(PETSC_USE_COMPLEX)
1724             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1725               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1726                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1727             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1728               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1729                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1730             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1731               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             }
1733 #else
1734             if (a->a[bs2*k + l*bs + j] != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1736             }
1737 #endif
1738           }
1739         }
1740         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1741       }
1742     }
1743     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1744   } else {
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1746     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1747     for (i=0; i<a->mbs; i++) {
1748       for (j=0; j<bs; j++) {
1749         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1750         for (k=a->i[i]; k<a->i[i+1]; k++) {
1751           for (l=0; l<bs; l++) {
1752 #if defined(PETSC_USE_COMPLEX)
1753             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1754               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1755                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1756             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1757               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1758                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1759             } else {
1760               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             }
1762 #else
1763             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1764 #endif
1765           }
1766         }
1767         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1768       }
1769     }
1770     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1771   }
1772   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1778 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1779 {
1780   Mat            A = (Mat) Aa;
1781   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1782   PetscErrorCode ierr;
1783   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1784   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1785   MatScalar      *aa;
1786   PetscViewer    viewer;
1787   PetscViewerFormat format;
1788 
1789   PetscFunctionBegin;
1790   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1791   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1792 
1793   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1794 
1795   /* loop over matrix elements drawing boxes */
1796 
1797   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1798     color = PETSC_DRAW_BLUE;
1799     for (i=0,row=0; i<mbs; i++,row+=bs) {
1800       for (j=a->i[i]; j<a->i[i+1]; j++) {
1801         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1802         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1803         aa = a->a + j*bs2;
1804         for (k=0; k<bs; k++) {
1805           for (l=0; l<bs; l++) {
1806             if (PetscRealPart(*aa++) >=  0.) continue;
1807             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1808           }
1809         }
1810       }
1811     }
1812     color = PETSC_DRAW_CYAN;
1813     for (i=0,row=0; i<mbs; i++,row+=bs) {
1814       for (j=a->i[i]; j<a->i[i+1]; j++) {
1815         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1816         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1817         aa = a->a + j*bs2;
1818         for (k=0; k<bs; k++) {
1819           for (l=0; l<bs; l++) {
1820             if (PetscRealPart(*aa++) != 0.) continue;
1821             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1822           }
1823         }
1824       }
1825     }
1826     color = PETSC_DRAW_RED;
1827     for (i=0,row=0; i<mbs; i++,row+=bs) {
1828       for (j=a->i[i]; j<a->i[i+1]; j++) {
1829         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1830         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1831         aa = a->a + j*bs2;
1832         for (k=0; k<bs; k++) {
1833           for (l=0; l<bs; l++) {
1834             if (PetscRealPart(*aa++) <= 0.) continue;
1835             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1836           }
1837         }
1838       }
1839     }
1840   } else {
1841     /* use contour shading to indicate magnitude of values */
1842     /* first determine max of all nonzero values */
1843     PetscDraw   popup;
1844     PetscReal scale,maxv = 0.0;
1845 
1846     for (i=0; i<a->nz*a->bs2; i++) {
1847       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1848     }
1849     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1850     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1851     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1852     for (i=0,row=0; i<mbs; i++,row+=bs) {
1853       for (j=a->i[i]; j<a->i[i+1]; j++) {
1854         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1855         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1856         aa = a->a + j*bs2;
1857         for (k=0; k<bs; k++) {
1858           for (l=0; l<bs; l++) {
1859             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1860             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1861           }
1862         }
1863       }
1864     }
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1871 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1872 {
1873   PetscErrorCode ierr;
1874   PetscReal      xl,yl,xr,yr,w,h;
1875   PetscDraw      draw;
1876   PetscBool      isnull;
1877 
1878   PetscFunctionBegin;
1879 
1880   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1881   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1882 
1883   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1884   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1885   xr += w;    yr += h;  xl = -w;     yl = -h;
1886   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1887   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1888   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatView_SeqBAIJ"
1894 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1895 {
1896   PetscErrorCode ierr;
1897   PetscBool      iascii,isbinary,isdraw;
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1901   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1902   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1903   if (iascii){
1904     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1905   } else if (isbinary) {
1906     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1907   } else if (isdraw) {
1908     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1909   } else {
1910     Mat B;
1911     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1912     ierr = MatView(B,viewer);CHKERRQ(ierr);
1913     ierr = MatDestroy(&B);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1921 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1922 {
1923   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1924   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1925   PetscInt    *ai = a->i,*ailen = a->ilen;
1926   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1927   MatScalar   *ap,*aa = a->a;
1928 
1929   PetscFunctionBegin;
1930   for (k=0; k<m; k++) { /* loop over rows */
1931     row  = im[k]; brow = row/bs;
1932     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1933     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1934     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1935     nrow = ailen[brow];
1936     for (l=0; l<n; l++) { /* loop over columns */
1937       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1938       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1939       col  = in[l] ;
1940       bcol = col/bs;
1941       cidx = col%bs;
1942       ridx = row%bs;
1943       high = nrow;
1944       low  = 0; /* assume unsorted */
1945       while (high-low > 5) {
1946         t = (low+high)/2;
1947         if (rp[t] > bcol) high = t;
1948         else             low  = t;
1949       }
1950       for (i=low; i<high; i++) {
1951         if (rp[i] > bcol) break;
1952         if (rp[i] == bcol) {
1953           *v++ = ap[bs2*i+bs*cidx+ridx];
1954           goto finished;
1955         }
1956       }
1957       *v++ = 0.0;
1958       finished:;
1959     }
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1966 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1967 {
1968   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1969   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1970   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1971   PetscErrorCode    ierr;
1972   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1973   PetscBool         roworiented=a->roworiented;
1974   const PetscScalar *value = v;
1975   MatScalar         *ap,*aa = a->a,*bap;
1976 
1977   PetscFunctionBegin;
1978   if (roworiented) {
1979     stepval = (n-1)*bs;
1980   } else {
1981     stepval = (m-1)*bs;
1982   }
1983   for (k=0; k<m; k++) { /* loop over added rows */
1984     row  = im[k];
1985     if (row < 0) continue;
1986 #if defined(PETSC_USE_DEBUG)
1987     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1988 #endif
1989     rp   = aj + ai[row];
1990     ap   = aa + bs2*ai[row];
1991     rmax = imax[row];
1992     nrow = ailen[row];
1993     low  = 0;
1994     high = nrow;
1995     for (l=0; l<n; l++) { /* loop over added columns */
1996       if (in[l] < 0) continue;
1997 #if defined(PETSC_USE_DEBUG)
1998       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1999 #endif
2000       col = in[l];
2001       if (roworiented) {
2002         value = v + (k*(stepval+bs) + l)*bs;
2003       } else {
2004         value = v + (l*(stepval+bs) + k)*bs;
2005       }
2006       if (col <= lastcol) low = 0; else high = nrow;
2007       lastcol = col;
2008       while (high-low > 7) {
2009         t = (low+high)/2;
2010         if (rp[t] > col) high = t;
2011         else             low  = t;
2012       }
2013       for (i=low; i<high; i++) {
2014         if (rp[i] > col) break;
2015         if (rp[i] == col) {
2016           bap  = ap +  bs2*i;
2017           if (roworiented) {
2018             if (is == ADD_VALUES) {
2019               for (ii=0; ii<bs; ii++,value+=stepval) {
2020                 for (jj=ii; jj<bs2; jj+=bs) {
2021                   bap[jj] += *value++;
2022                 }
2023               }
2024             } else {
2025               for (ii=0; ii<bs; ii++,value+=stepval) {
2026                 for (jj=ii; jj<bs2; jj+=bs) {
2027                   bap[jj] = *value++;
2028                 }
2029               }
2030             }
2031           } else {
2032             if (is == ADD_VALUES) {
2033               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2034                 for (jj=0; jj<bs; jj++) {
2035                   bap[jj] += value[jj];
2036                 }
2037                 bap += bs;
2038               }
2039             } else {
2040               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2041                 for (jj=0; jj<bs; jj++) {
2042                   bap[jj]  = value[jj];
2043                 }
2044                 bap += bs;
2045               }
2046             }
2047           }
2048           goto noinsert2;
2049         }
2050       }
2051       if (nonew == 1) goto noinsert2;
2052       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2053       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2054       N = nrow++ - 1; high++;
2055       /* shift up all the later entries in this row */
2056       for (ii=N; ii>=i; ii--) {
2057         rp[ii+1] = rp[ii];
2058         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2059       }
2060       if (N >= i) {
2061         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2062       }
2063       rp[i] = col;
2064       bap   = ap +  bs2*i;
2065       if (roworiented) {
2066         for (ii=0; ii<bs; ii++,value+=stepval) {
2067           for (jj=ii; jj<bs2; jj+=bs) {
2068             bap[jj] = *value++;
2069           }
2070         }
2071       } else {
2072         for (ii=0; ii<bs; ii++,value+=stepval) {
2073           for (jj=0; jj<bs; jj++) {
2074             *bap++  = *value++;
2075           }
2076         }
2077       }
2078       noinsert2:;
2079       low = i;
2080     }
2081     ailen[row] = nrow;
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2088 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2089 {
2090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2091   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2092   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2093   PetscErrorCode ierr;
2094   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2095   MatScalar      *aa = a->a,*ap;
2096   PetscReal      ratio=0.6;
2097 
2098   PetscFunctionBegin;
2099   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2100 
2101   if (m) rmax = ailen[0];
2102   for (i=1; i<mbs; i++) {
2103     /* move each row back by the amount of empty slots (fshift) before it*/
2104     fshift += imax[i-1] - ailen[i-1];
2105     rmax   = PetscMax(rmax,ailen[i]);
2106     if (fshift) {
2107       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2108       N = ailen[i];
2109       for (j=0; j<N; j++) {
2110         ip[j-fshift] = ip[j];
2111         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2112       }
2113     }
2114     ai[i] = ai[i-1] + ailen[i-1];
2115   }
2116   if (mbs) {
2117     fshift += imax[mbs-1] - ailen[mbs-1];
2118     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2119   }
2120   /* reset ilen and imax for each row */
2121   for (i=0; i<mbs; i++) {
2122     ailen[i] = imax[i] = ai[i+1] - ai[i];
2123   }
2124   a->nz = ai[mbs];
2125 
2126   /* diagonals may have moved, so kill the diagonal pointers */
2127   a->idiagvalid = PETSC_FALSE;
2128   if (fshift && a->diag) {
2129     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2130     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2131     a->diag = 0;
2132   }
2133   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2134   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2135   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2136   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2137   A->info.mallocs     += a->reallocs;
2138   a->reallocs          = 0;
2139   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2140 
2141   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2142   A->same_nonzero = PETSC_TRUE;
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 /*
2147    This function returns an array of flags which indicate the locations of contiguous
2148    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2149    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2150    Assume: sizes should be long enough to hold all the values.
2151 */
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2154 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2155 {
2156   PetscInt   i,j,k,row;
2157   PetscBool  flg;
2158 
2159   PetscFunctionBegin;
2160   for (i=0,j=0; i<n; j++) {
2161     row = idx[i];
2162     if (row%bs!=0) { /* Not the begining of a block */
2163       sizes[j] = 1;
2164       i++;
2165     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2166       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2167       i++;
2168     } else { /* Begining of the block, so check if the complete block exists */
2169       flg = PETSC_TRUE;
2170       for (k=1; k<bs; k++) {
2171         if (row+k != idx[i+k]) { /* break in the block */
2172           flg = PETSC_FALSE;
2173           break;
2174         }
2175       }
2176       if (flg) { /* No break in the bs */
2177         sizes[j] = bs;
2178         i+= bs;
2179       } else {
2180         sizes[j] = 1;
2181         i++;
2182       }
2183     }
2184   }
2185   *bs_max = j;
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2191 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2192 {
2193   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2194   PetscErrorCode    ierr;
2195   PetscInt          i,j,k,count,*rows;
2196   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2197   PetscScalar       zero = 0.0;
2198   MatScalar         *aa;
2199   const PetscScalar *xx;
2200   PetscScalar       *bb;
2201 
2202   PetscFunctionBegin;
2203   /* fix right hand side if needed */
2204   if (x && b) {
2205     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2206     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2207     for (i=0; i<is_n; i++) {
2208       bb[is_idx[i]] = diag*xx[is_idx[i]];
2209     }
2210     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2211     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2212   }
2213 
2214   /* Make a copy of the IS and  sort it */
2215   /* allocate memory for rows,sizes */
2216   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2217 
2218   /* copy IS values to rows, and sort them */
2219   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2220   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2221 
2222   if (baij->keepnonzeropattern) {
2223     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2224     bs_max = is_n;
2225     A->same_nonzero = PETSC_TRUE;
2226   } else {
2227     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2228     A->same_nonzero = PETSC_FALSE;
2229   }
2230 
2231   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2232     row   = rows[j];
2233     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2234     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2235     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2236     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2237       if (diag != (PetscScalar)0.0) {
2238         if (baij->ilen[row/bs] > 0) {
2239           baij->ilen[row/bs]       = 1;
2240           baij->j[baij->i[row/bs]] = row/bs;
2241           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2242         }
2243         /* Now insert all the diagonal values for this bs */
2244         for (k=0; k<bs; k++) {
2245           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2246         }
2247       } else { /* (diag == 0.0) */
2248         baij->ilen[row/bs] = 0;
2249       } /* end (diag == 0.0) */
2250     } else { /* (sizes[i] != bs) */
2251 #if defined (PETSC_USE_DEBUG)
2252       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2253 #endif
2254       for (k=0; k<count; k++) {
2255         aa[0] =  zero;
2256         aa    += bs;
2257       }
2258       if (diag != (PetscScalar)0.0) {
2259         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2260       }
2261     }
2262   }
2263 
2264   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2271 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2272 {
2273   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2274   PetscErrorCode    ierr;
2275   PetscInt          i,j,k,count;
2276   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2277   PetscScalar       zero = 0.0;
2278   MatScalar         *aa;
2279   const PetscScalar *xx;
2280   PetscScalar       *bb;
2281   PetscBool         *zeroed,vecs = PETSC_FALSE;
2282 
2283   PetscFunctionBegin;
2284   /* fix right hand side if needed */
2285   if (x && b) {
2286     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2287     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2288     vecs = PETSC_TRUE;
2289   }
2290   A->same_nonzero = PETSC_TRUE;
2291 
2292   /* zero the columns */
2293   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2294   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2295   for (i=0; i<is_n; i++) {
2296     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2297     zeroed[is_idx[i]] = PETSC_TRUE;
2298   }
2299   for (i=0; i<A->rmap->N; i++) {
2300     if (!zeroed[i]) {
2301       row = i/bs;
2302       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2303         for (k=0; k<bs; k++) {
2304           col = bs*baij->j[j] + k;
2305 	  if (zeroed[col]) {
2306 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2307             if (vecs) bb[i] -= aa[0]*xx[col];
2308             aa[0] = 0.0;
2309           }
2310         }
2311       }
2312     } else if (vecs) bb[i] = diag*xx[i];
2313   }
2314   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2315   if (vecs) {
2316     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2317     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2318   }
2319 
2320   /* zero the rows */
2321   for (i=0; i<is_n; i++) {
2322     row   = is_idx[i];
2323     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2324     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2325     for (k=0; k<count; k++) {
2326       aa[0] =  zero;
2327       aa    += bs;
2328     }
2329     if (diag != (PetscScalar)0.0) {
2330       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2331     }
2332   }
2333   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2339 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2340 {
2341   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2342   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2343   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2344   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2345   PetscErrorCode ierr;
2346   PetscInt       ridx,cidx,bs2=a->bs2;
2347   PetscBool      roworiented=a->roworiented;
2348   MatScalar      *ap,value,*aa=a->a,*bap;
2349 
2350   PetscFunctionBegin;
2351   if (v) PetscValidScalarPointer(v,6);
2352   for (k=0; k<m; k++) { /* loop over added rows */
2353     row  = im[k];
2354     brow = row/bs;
2355     if (row < 0) continue;
2356 #if defined(PETSC_USE_DEBUG)
2357     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2358 #endif
2359     rp   = aj + ai[brow];
2360     ap   = aa + bs2*ai[brow];
2361     rmax = imax[brow];
2362     nrow = ailen[brow];
2363     low  = 0;
2364     high = nrow;
2365     for (l=0; l<n; l++) { /* loop over added columns */
2366       if (in[l] < 0) continue;
2367 #if defined(PETSC_USE_DEBUG)
2368       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2369 #endif
2370       col = in[l]; bcol = col/bs;
2371       ridx = row % bs; cidx = col % bs;
2372       if (roworiented) {
2373         value = v[l + k*n];
2374       } else {
2375         value = v[k + l*m];
2376       }
2377       if (col <= lastcol) low = 0; else high = nrow;
2378       lastcol = col;
2379       while (high-low > 7) {
2380         t = (low+high)/2;
2381         if (rp[t] > bcol) high = t;
2382         else              low  = t;
2383       }
2384       for (i=low; i<high; i++) {
2385         if (rp[i] > bcol) break;
2386         if (rp[i] == bcol) {
2387           bap  = ap +  bs2*i + bs*cidx + ridx;
2388           if (is == ADD_VALUES) *bap += value;
2389           else                  *bap  = value;
2390           goto noinsert1;
2391         }
2392       }
2393       if (nonew == 1) goto noinsert1;
2394       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2395       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2396       N = nrow++ - 1; high++;
2397       /* shift up all the later entries in this row */
2398       for (ii=N; ii>=i; ii--) {
2399         rp[ii+1] = rp[ii];
2400         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2401       }
2402       if (N>=i) {
2403         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2404       }
2405       rp[i]                      = bcol;
2406       ap[bs2*i + bs*cidx + ridx] = value;
2407       a->nz++;
2408       noinsert1:;
2409       low = i;
2410     }
2411     ailen[brow] = nrow;
2412   }
2413   A->same_nonzero = PETSC_FALSE;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2419 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2420 {
2421   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2422   Mat            outA;
2423   PetscErrorCode ierr;
2424   PetscBool      row_identity,col_identity;
2425 
2426   PetscFunctionBegin;
2427   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2428   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2429   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2430   if (!row_identity || !col_identity) {
2431     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2432   }
2433 
2434   outA            = inA;
2435   inA->factortype = MAT_FACTOR_LU;
2436 
2437   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2438 
2439   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2441   a->row = row;
2442   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2443   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2444   a->col = col;
2445 
2446   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2447   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2448    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2450 
2451   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2452   if (!a->solve_work) {
2453     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2454     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2455   }
2456   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2457 
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 EXTERN_C_BEGIN
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2464 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2465 {
2466   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2467   PetscInt    i,nz,mbs;
2468 
2469   PetscFunctionBegin;
2470   nz  = baij->maxnz;
2471   mbs = baij->mbs;
2472   for (i=0; i<nz; i++) {
2473     baij->j[i] = indices[i];
2474   }
2475   baij->nz = nz;
2476   for (i=0; i<mbs; i++) {
2477     baij->ilen[i] = baij->imax[i];
2478   }
2479   PetscFunctionReturn(0);
2480 }
2481 EXTERN_C_END
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2485 /*@
2486     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2487        in the matrix.
2488 
2489   Input Parameters:
2490 +  mat - the SeqBAIJ matrix
2491 -  indices - the column indices
2492 
2493   Level: advanced
2494 
2495   Notes:
2496     This can be called if you have precomputed the nonzero structure of the
2497   matrix and want to provide it to the matrix object to improve the performance
2498   of the MatSetValues() operation.
2499 
2500     You MUST have set the correct numbers of nonzeros per row in the call to
2501   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2502 
2503     MUST be called before any calls to MatSetValues();
2504 
2505 @*/
2506 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2512   PetscValidPointer(indices,2);
2513   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2519 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2520 {
2521   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2522   PetscErrorCode ierr;
2523   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2524   PetscReal      atmp;
2525   PetscScalar    *x,zero = 0.0;
2526   MatScalar      *aa;
2527   PetscInt       ncols,brow,krow,kcol;
2528 
2529   PetscFunctionBegin;
2530   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2531   bs   = A->rmap->bs;
2532   aa   = a->a;
2533   ai   = a->i;
2534   aj   = a->j;
2535   mbs  = a->mbs;
2536 
2537   ierr = VecSet(v,zero);CHKERRQ(ierr);
2538   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2539   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2540   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2541   for (i=0; i<mbs; i++) {
2542     ncols = ai[1] - ai[0]; ai++;
2543     brow  = bs*i;
2544     for (j=0; j<ncols; j++){
2545       for (kcol=0; kcol<bs; kcol++){
2546         for (krow=0; krow<bs; krow++){
2547           atmp = PetscAbsScalar(*aa);aa++;
2548           row = brow + krow;    /* row index */
2549           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2550           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2551         }
2552       }
2553       aj++;
2554     }
2555   }
2556   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatCopy_SeqBAIJ"
2562 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2563 {
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   /* If the two matrices have the same copy implementation, use fast copy. */
2568   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2569     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2570     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2571     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2572 
2573     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2574     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2575     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2576   } else {
2577     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2584 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2585 {
2586   PetscErrorCode ierr;
2587 
2588   PetscFunctionBegin;
2589   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2595 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2596 {
2597   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2598   PetscFunctionBegin;
2599   *array = a->a;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2605 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2606 {
2607   PetscFunctionBegin;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2613 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2614 {
2615   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2616   PetscErrorCode ierr;
2617   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2618   PetscBLASInt   one=1;
2619 
2620   PetscFunctionBegin;
2621   if (str == SAME_NONZERO_PATTERN) {
2622     PetscScalar alpha = a;
2623     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2624     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2625   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2626     if (y->xtoy && y->XtoY != X) {
2627       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2628       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2629     }
2630     if (!y->xtoy) { /* get xtoy */
2631       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2632       y->XtoY = X;
2633       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2634     }
2635     for (i=0; i<x->nz; i++) {
2636       j = 0;
2637       while (j < bs2){
2638         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2639         j++;
2640       }
2641     }
2642     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2643   } else {
2644     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2651 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2652 {
2653   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2654   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2655   MatScalar      *aa = a->a;
2656 
2657   PetscFunctionBegin;
2658   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2664 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2665 {
2666   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2667   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2668   MatScalar      *aa = a->a;
2669 
2670   PetscFunctionBegin;
2671   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2679 /*
2680     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2681 */
2682 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2683 {
2684   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2685   PetscErrorCode ierr;
2686   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2687   PetscInt       nz = a->i[m],row,*jj,mr,col;
2688 
2689   PetscFunctionBegin;
2690   *nn = n;
2691   if (!ia) PetscFunctionReturn(0);
2692   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2693   else {
2694     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2695     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2697     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2698     jj = a->j;
2699     for (i=0; i<nz; i++) {
2700       collengths[jj[i]]++;
2701     }
2702     cia[0] = oshift;
2703     for (i=0; i<n; i++) {
2704       cia[i+1] = cia[i] + collengths[i];
2705     }
2706     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2707     jj   = a->j;
2708     for (row=0; row<m; row++) {
2709       mr = a->i[row+1] - a->i[row];
2710       for (i=0; i<mr; i++) {
2711         col = *jj++;
2712         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2713       }
2714     }
2715     ierr = PetscFree(collengths);CHKERRQ(ierr);
2716     *ia = cia; *ja = cja;
2717   }
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2723 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2724 {
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   if (!ia) PetscFunctionReturn(0);
2729   ierr = PetscFree(*ia);CHKERRQ(ierr);
2730   ierr = PetscFree(*ja);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2736 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2737 {
2738   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2739   PetscErrorCode ierr;
2740   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2741   PetscScalar    dx,*y,*xx,*w3_array;
2742   PetscScalar    *vscale_array;
2743   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2744   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2745   void           *fctx = coloring->fctx;
2746   PetscBool      flg = PETSC_FALSE;
2747   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2748   Vec            x1_tmp;
2749 
2750   PetscFunctionBegin;
2751   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2752   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2753   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2754   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2755 
2756   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2757   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2758   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2759   if (flg) {
2760     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2761   } else {
2762     PetscBool  assembled;
2763     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2764     if (assembled) {
2765       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2766     }
2767   }
2768 
2769   x1_tmp = x1;
2770   if (!coloring->vscale){
2771     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2772   }
2773 
2774   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2775     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2776   }
2777   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2778 
2779   /* Set w1 = F(x1) */
2780   if (!coloring->fset) {
2781     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2782     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2783     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2784   } else {
2785     coloring->fset = PETSC_FALSE;
2786   }
2787 
2788   if (!coloring->w3) {
2789     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2790     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2791   }
2792   w3 = coloring->w3;
2793 
2794     CHKMEMQ;
2795     /* Compute all the local scale factors, including ghost points */
2796   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2797   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2798   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2799   if (ctype == IS_COLORING_GHOSTED){
2800     col_start = 0; col_end = N;
2801   } else if (ctype == IS_COLORING_GLOBAL){
2802     xx = xx - start;
2803     vscale_array = vscale_array - start;
2804     col_start = start; col_end = N + start;
2805   }    CHKMEMQ;
2806   for (col=col_start; col<col_end; col++){
2807     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2808     if (coloring->htype[0] == 'w') {
2809       dx = 1.0 + unorm;
2810     } else {
2811       dx  = xx[col];
2812     }
2813     if (dx == (PetscScalar)0.0) dx = 1.0;
2814 #if !defined(PETSC_USE_COMPLEX)
2815     if (dx < umin && dx >= 0.0)      dx = umin;
2816     else if (dx < 0.0 && dx > -umin) dx = -umin;
2817 #else
2818     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2819     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2820 #endif
2821     dx               *= epsilon;
2822     vscale_array[col] = (PetscScalar)1.0/dx;
2823   }     CHKMEMQ;
2824   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2825   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2826   if (ctype == IS_COLORING_GLOBAL){
2827     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2828     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2829   }
2830   CHKMEMQ;
2831   if (coloring->vscaleforrow) {
2832     vscaleforrow = coloring->vscaleforrow;
2833   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2834 
2835   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2836   /*
2837     Loop over each color
2838   */
2839   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2840   for (k=0; k<coloring->ncolors; k++) {
2841     coloring->currentcolor = k;
2842     for (i=0; i<bs; i++) {
2843       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2844       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2845       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2846       /*
2847 	Loop over each column associated with color
2848 	adding the perturbation to the vector w3.
2849       */
2850       for (l=0; l<coloring->ncolumns[k]; l++) {
2851 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2852 	if (coloring->htype[0] == 'w') {
2853 	  dx = 1.0 + unorm;
2854 	} else {
2855 	  dx  = xx[col];
2856 	}
2857 	if (dx == (PetscScalar)0.0) dx = 1.0;
2858 #if !defined(PETSC_USE_COMPLEX)
2859 	if (dx < umin && dx >= 0.0)      dx = umin;
2860 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2861 #else
2862 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2863 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2864 #endif
2865 	dx            *= epsilon;
2866 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2867 	w3_array[col] += dx;
2868       }
2869       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2870       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2871 
2872       /*
2873 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2874 	w2 = F(x1 + dx) - F(x1)
2875       */
2876       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2877       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2878       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2879       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2880 
2881       /*
2882 	Loop over rows of vector, putting results into Jacobian matrix
2883       */
2884       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2885       for (l=0; l<coloring->nrows[k]; l++) {
2886 	row    = bs*coloring->rows[k][l];             /* local row index */
2887 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2888         for (j=0; j<bs; j++) {
2889   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2890           srows[j]  = row + start + j;
2891         }
2892 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2893       }
2894       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2895     }
2896   } /* endof for each color */
2897   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2898   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2899   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2900   ierr = PetscFree(srows);CHKERRQ(ierr);
2901 
2902   coloring->currentcolor = -1;
2903   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2904   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2905   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2906   PetscFunctionReturn(0);
2907 }
2908 
2909 /* -------------------------------------------------------------------*/
2910 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2911        MatGetRow_SeqBAIJ,
2912        MatRestoreRow_SeqBAIJ,
2913        MatMult_SeqBAIJ_N,
2914 /* 4*/ MatMultAdd_SeqBAIJ_N,
2915        MatMultTranspose_SeqBAIJ,
2916        MatMultTransposeAdd_SeqBAIJ,
2917        0,
2918        0,
2919        0,
2920 /*10*/ 0,
2921        MatLUFactor_SeqBAIJ,
2922        0,
2923        0,
2924        MatTranspose_SeqBAIJ,
2925 /*15*/ MatGetInfo_SeqBAIJ,
2926        MatEqual_SeqBAIJ,
2927        MatGetDiagonal_SeqBAIJ,
2928        MatDiagonalScale_SeqBAIJ,
2929        MatNorm_SeqBAIJ,
2930 /*20*/ 0,
2931        MatAssemblyEnd_SeqBAIJ,
2932        MatSetOption_SeqBAIJ,
2933        MatZeroEntries_SeqBAIJ,
2934 /*24*/ MatZeroRows_SeqBAIJ,
2935        0,
2936        0,
2937        0,
2938        0,
2939 /*29*/ MatSetUp_SeqBAIJ,
2940        0,
2941        0,
2942        0,
2943        0,
2944 /*34*/ MatDuplicate_SeqBAIJ,
2945        0,
2946        0,
2947        MatILUFactor_SeqBAIJ,
2948        0,
2949 /*39*/ MatAXPY_SeqBAIJ,
2950        MatGetSubMatrices_SeqBAIJ,
2951        MatIncreaseOverlap_SeqBAIJ,
2952        MatGetValues_SeqBAIJ,
2953        MatCopy_SeqBAIJ,
2954 /*44*/ 0,
2955        MatScale_SeqBAIJ,
2956        0,
2957        0,
2958        MatZeroRowsColumns_SeqBAIJ,
2959 /*49*/ 0,
2960        MatGetRowIJ_SeqBAIJ,
2961        MatRestoreRowIJ_SeqBAIJ,
2962        MatGetColumnIJ_SeqBAIJ,
2963        MatRestoreColumnIJ_SeqBAIJ,
2964 /*54*/ MatFDColoringCreate_SeqAIJ,
2965        0,
2966        0,
2967        0,
2968        MatSetValuesBlocked_SeqBAIJ,
2969 /*59*/ MatGetSubMatrix_SeqBAIJ,
2970        MatDestroy_SeqBAIJ,
2971        MatView_SeqBAIJ,
2972        0,
2973        0,
2974 /*64*/ 0,
2975        0,
2976        0,
2977        0,
2978        0,
2979 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2980        0,
2981        MatConvert_Basic,
2982        0,
2983        0,
2984 /*74*/ 0,
2985        MatFDColoringApply_BAIJ,
2986        0,
2987        0,
2988        0,
2989 /*79*/ 0,
2990        0,
2991        0,
2992        0,
2993        MatLoad_SeqBAIJ,
2994 /*84*/ 0,
2995        0,
2996        0,
2997        0,
2998        0,
2999 /*89*/ 0,
3000        0,
3001        0,
3002        0,
3003        0,
3004 /*94*/ 0,
3005        0,
3006        0,
3007        0,
3008        0,
3009 /*99*/0,
3010        0,
3011        0,
3012        0,
3013        0,
3014 /*104*/0,
3015        MatRealPart_SeqBAIJ,
3016        MatImaginaryPart_SeqBAIJ,
3017        0,
3018        0,
3019 /*109*/0,
3020        0,
3021        0,
3022        0,
3023        MatMissingDiagonal_SeqBAIJ,
3024 /*114*/0,
3025        0,
3026        0,
3027        0,
3028        0,
3029 /*119*/0,
3030        0,
3031        MatMultHermitianTranspose_SeqBAIJ,
3032        MatMultHermitianTransposeAdd_SeqBAIJ,
3033        0,
3034 /*124*/0,
3035        0,
3036        MatInvertBlockDiagonal_SeqBAIJ
3037 };
3038 
3039 EXTERN_C_BEGIN
3040 #undef __FUNCT__
3041 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3042 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3043 {
3044   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3045   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3046   PetscErrorCode ierr;
3047 
3048   PetscFunctionBegin;
3049   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3050 
3051   /* allocate space for values if not already there */
3052   if (!aij->saved_values) {
3053     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3054     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3055   }
3056 
3057   /* copy values over */
3058   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3059   PetscFunctionReturn(0);
3060 }
3061 EXTERN_C_END
3062 
3063 EXTERN_C_BEGIN
3064 #undef __FUNCT__
3065 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3066 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3067 {
3068   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3069   PetscErrorCode ierr;
3070   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3071 
3072   PetscFunctionBegin;
3073   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3074   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3075 
3076   /* copy values over */
3077   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3078   PetscFunctionReturn(0);
3079 }
3080 EXTERN_C_END
3081 
3082 EXTERN_C_BEGIN
3083 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3084 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3085 EXTERN_C_END
3086 
3087 EXTERN_C_BEGIN
3088 #undef __FUNCT__
3089 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3090 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3091 {
3092   Mat_SeqBAIJ    *b;
3093   PetscErrorCode ierr;
3094   PetscInt       i,mbs,nbs,bs2;
3095   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3096 
3097   PetscFunctionBegin;
3098   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3099   if (nz == MAT_SKIP_ALLOCATION) {
3100     skipallocation = PETSC_TRUE;
3101     nz             = 0;
3102   }
3103 
3104   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3105   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3106   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3107   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3108   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3109 
3110   B->preallocated = PETSC_TRUE;
3111 
3112   mbs  = B->rmap->n/bs;
3113   nbs  = B->cmap->n/bs;
3114   bs2  = bs*bs;
3115 
3116   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3117 
3118   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3119   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3120   if (nnz) {
3121     for (i=0; i<mbs; i++) {
3122       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3123       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3124     }
3125   }
3126 
3127   b       = (Mat_SeqBAIJ*)B->data;
3128   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3129     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3130   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3131 
3132   if (!flg) {
3133     switch (bs) {
3134     case 1:
3135       B->ops->mult            = MatMult_SeqBAIJ_1;
3136       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3137       B->ops->sor             = MatSOR_SeqBAIJ_1;
3138       break;
3139     case 2:
3140       B->ops->mult            = MatMult_SeqBAIJ_2;
3141       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3142       B->ops->sor             = MatSOR_SeqBAIJ_2;
3143       break;
3144     case 3:
3145       B->ops->mult            = MatMult_SeqBAIJ_3;
3146       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3147       B->ops->sor             = MatSOR_SeqBAIJ_3;
3148       break;
3149     case 4:
3150       B->ops->mult            = MatMult_SeqBAIJ_4;
3151       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3152       B->ops->sor             = MatSOR_SeqBAIJ_4;
3153       break;
3154     case 5:
3155       B->ops->mult            = MatMult_SeqBAIJ_5;
3156       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3157       B->ops->sor             = MatSOR_SeqBAIJ_5;
3158       break;
3159     case 6:
3160       B->ops->mult            = MatMult_SeqBAIJ_6;
3161       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3162       B->ops->sor             = MatSOR_SeqBAIJ_6;
3163       break;
3164     case 7:
3165       B->ops->mult            = MatMult_SeqBAIJ_7;
3166       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3167       B->ops->sor             = MatSOR_SeqBAIJ_7;
3168       break;
3169     case 15:
3170       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3171       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3172       B->ops->sor             = MatSOR_SeqBAIJ_N;
3173       break;
3174     default:
3175       B->ops->mult            = MatMult_SeqBAIJ_N;
3176       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3177       B->ops->sor             = MatSOR_SeqBAIJ_N;
3178       break;
3179     }
3180   }
3181   b->mbs       = mbs;
3182   b->nbs       = nbs;
3183   if (!skipallocation) {
3184     if (!b->imax) {
3185       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3186       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3187       b->free_imax_ilen = PETSC_TRUE;
3188     }
3189     /* b->ilen will count nonzeros in each block row so far. */
3190     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3191     if (!nnz) {
3192       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3193       else if (nz < 0) nz = 1;
3194       for (i=0; i<mbs; i++) b->imax[i] = nz;
3195       nz = nz*mbs;
3196     } else {
3197       nz = 0;
3198       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3199     }
3200 
3201     /* allocate the matrix space */
3202     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3203     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3204     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3205     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3206     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3207     b->singlemalloc = PETSC_TRUE;
3208     b->i[0] = 0;
3209     for (i=1; i<mbs+1; i++) {
3210       b->i[i] = b->i[i-1] + b->imax[i-1];
3211     }
3212     b->free_a     = PETSC_TRUE;
3213     b->free_ij    = PETSC_TRUE;
3214   } else {
3215     b->free_a     = PETSC_FALSE;
3216     b->free_ij    = PETSC_FALSE;
3217   }
3218 
3219   b->bs2              = bs2;
3220   b->mbs              = mbs;
3221   b->nz               = 0;
3222   b->maxnz            = nz;
3223   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3224   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3225   PetscFunctionReturn(0);
3226 }
3227 EXTERN_C_END
3228 
3229 EXTERN_C_BEGIN
3230 #undef __FUNCT__
3231 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3232 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3233 {
3234   PetscInt       i,m,nz,nz_max=0,*nnz;
3235   PetscScalar    *values=0;
3236   PetscErrorCode ierr;
3237 
3238   PetscFunctionBegin;
3239   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3240   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3241   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3242   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3243   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3244   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3245   m = B->rmap->n/bs;
3246 
3247   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3248   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3249   for (i=0; i<m; i++) {
3250     nz = ii[i+1]- ii[i];
3251     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3252     nz_max = PetscMax(nz_max, nz);
3253     nnz[i] = nz;
3254   }
3255   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3256   ierr = PetscFree(nnz);CHKERRQ(ierr);
3257 
3258   values = (PetscScalar*)V;
3259   if (!values) {
3260     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3261     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3262   }
3263   for (i=0; i<m; i++) {
3264     PetscInt          ncols  = ii[i+1] - ii[i];
3265     const PetscInt    *icols = jj + ii[i];
3266     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3267     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3268   }
3269   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3270   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3271   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3272   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3273   PetscFunctionReturn(0);
3274 }
3275 EXTERN_C_END
3276 
3277 
3278 EXTERN_C_BEGIN
3279 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3280 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3281 #if defined(PETSC_HAVE_MUMPS)
3282 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3283 #endif
3284 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3285 EXTERN_C_END
3286 
3287 /*MC
3288    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3289    block sparse compressed row format.
3290 
3291    Options Database Keys:
3292 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3293 
3294   Level: beginner
3295 
3296 .seealso: MatCreateSeqBAIJ()
3297 M*/
3298 
3299 EXTERN_C_BEGIN
3300 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3301 EXTERN_C_END
3302 
3303 EXTERN_C_BEGIN
3304 #undef __FUNCT__
3305 #define __FUNCT__ "MatCreate_SeqBAIJ"
3306 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3307 {
3308   PetscErrorCode ierr;
3309   PetscMPIInt    size;
3310   Mat_SeqBAIJ    *b;
3311 
3312   PetscFunctionBegin;
3313   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3314   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3315 
3316   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3317   B->data = (void*)b;
3318   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3319   b->row                   = 0;
3320   b->col                   = 0;
3321   b->icol                  = 0;
3322   b->reallocs              = 0;
3323   b->saved_values          = 0;
3324 
3325   b->roworiented           = PETSC_TRUE;
3326   b->nonew                 = 0;
3327   b->diag                  = 0;
3328   b->solve_work            = 0;
3329   b->mult_work             = 0;
3330   B->spptr                 = 0;
3331   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3332   b->keepnonzeropattern    = PETSC_FALSE;
3333   b->xtoy                  = 0;
3334   b->XtoY                  = 0;
3335   B->same_nonzero          = PETSC_FALSE;
3336 
3337   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3338                                      "MatGetFactorAvailable_seqbaij_petsc",
3339                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3340   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3341                                      "MatGetFactor_seqbaij_petsc",
3342                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3343   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3344                                      "MatGetFactor_seqbaij_bstrm",
3345                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3346 #if defined(PETSC_HAVE_MUMPS)
3347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3348 #endif
3349   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3350                                      "MatInvertBlockDiagonal_SeqBAIJ",
3351                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3353                                      "MatStoreValues_SeqBAIJ",
3354                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3356                                      "MatRetrieveValues_SeqBAIJ",
3357                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3359                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3360                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3362                                      "MatConvert_SeqBAIJ_SeqAIJ",
3363                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3364   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3365                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3366                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3368                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3369                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3370   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3371                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3372                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3374                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3375                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3377                                      "MatIsTranspose_SeqBAIJ",
3378                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3379   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3380   PetscFunctionReturn(0);
3381 }
3382 EXTERN_C_END
3383 
3384 #undef __FUNCT__
3385 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3386 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3387 {
3388   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3389   PetscErrorCode ierr;
3390   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3391 
3392   PetscFunctionBegin;
3393   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3394 
3395   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3396     c->imax = a->imax;
3397     c->ilen = a->ilen;
3398     c->free_imax_ilen = PETSC_FALSE;
3399   } else {
3400     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3401     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3402     for (i=0; i<mbs; i++) {
3403       c->imax[i] = a->imax[i];
3404       c->ilen[i] = a->ilen[i];
3405     }
3406     c->free_imax_ilen = PETSC_TRUE;
3407   }
3408 
3409   /* allocate the matrix space */
3410   if (mallocmatspace){
3411     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3412       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3413       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3414       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3415       c->i            = a->i;
3416       c->j            = a->j;
3417       c->singlemalloc = PETSC_FALSE;
3418       c->free_a       = PETSC_TRUE;
3419       c->free_ij      = PETSC_FALSE;
3420       c->parent       = A;
3421       C->preallocated = PETSC_TRUE;
3422       C->assembled    = PETSC_TRUE;
3423       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3424       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3425       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3426     } else {
3427       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3428       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3429       c->singlemalloc = PETSC_TRUE;
3430       c->free_a       = PETSC_TRUE;
3431       c->free_ij      = PETSC_TRUE;
3432       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3433       if (mbs > 0) {
3434 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3435 	if (cpvalues == MAT_COPY_VALUES) {
3436 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3437 	} else {
3438 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3439 	}
3440       }
3441       C->preallocated = PETSC_TRUE;
3442       C->assembled    = PETSC_TRUE;
3443     }
3444   }
3445 
3446   c->roworiented = a->roworiented;
3447   c->nonew       = a->nonew;
3448   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3449   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3450   c->bs2         = a->bs2;
3451   c->mbs         = a->mbs;
3452   c->nbs         = a->nbs;
3453 
3454   if (a->diag) {
3455     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3456       c->diag      = a->diag;
3457       c->free_diag = PETSC_FALSE;
3458     } else {
3459       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3460       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3461       for (i=0; i<mbs; i++) {
3462         c->diag[i] = a->diag[i];
3463       }
3464       c->free_diag = PETSC_TRUE;
3465     }
3466   } else c->diag        = 0;
3467   c->nz                 = a->nz;
3468   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3469   c->solve_work         = 0;
3470   c->mult_work          = 0;
3471 
3472   c->compressedrow.use     = a->compressedrow.use;
3473   c->compressedrow.nrows   = a->compressedrow.nrows;
3474   c->compressedrow.check   = a->compressedrow.check;
3475   if (a->compressedrow.use){
3476     i = a->compressedrow.nrows;
3477     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3478     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3479     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3480     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3481   } else {
3482     c->compressedrow.use    = PETSC_FALSE;
3483     c->compressedrow.i      = PETSC_NULL;
3484     c->compressedrow.rindex = PETSC_NULL;
3485   }
3486   C->same_nonzero = A->same_nonzero;
3487   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3488   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3489   PetscFunctionReturn(0);
3490 }
3491 
3492 #undef __FUNCT__
3493 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3494 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3495 {
3496     PetscErrorCode ierr;
3497 
3498   PetscFunctionBegin;
3499   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3500   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3501   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3502   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 #undef __FUNCT__
3507 #define __FUNCT__ "MatLoad_SeqBAIJ"
3508 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3509 {
3510   Mat_SeqBAIJ    *a;
3511   PetscErrorCode ierr;
3512   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3513   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3514   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3515   PetscInt       *masked,nmask,tmp,bs2,ishift;
3516   PetscMPIInt    size;
3517   int            fd;
3518   PetscScalar    *aa;
3519   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3520 
3521   PetscFunctionBegin;
3522   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3523   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3524   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3525   bs2  = bs*bs;
3526 
3527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3528   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3529   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3530   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3531   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3532   M = header[1]; N = header[2]; nz = header[3];
3533 
3534   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3535   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3536 
3537   /*
3538      This code adds extra rows to make sure the number of rows is
3539     divisible by the blocksize
3540   */
3541   mbs        = M/bs;
3542   extra_rows = bs - M + bs*(mbs);
3543   if (extra_rows == bs) extra_rows = 0;
3544   else                  mbs++;
3545   if (extra_rows) {
3546     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3547   }
3548 
3549   /* Set global sizes if not already set */
3550   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3551     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3552   } else { /* Check if the matrix global sizes are correct */
3553     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3554     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3555       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3556     }
3557     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3558   }
3559 
3560   /* read in row lengths */
3561   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3562   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3563   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3564 
3565   /* read in column indices */
3566   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3567   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3568   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3569 
3570   /* loop over row lengths determining block row lengths */
3571   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3572   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3573   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3574   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3575   rowcount = 0;
3576   nzcount = 0;
3577   for (i=0; i<mbs; i++) {
3578     nmask = 0;
3579     for (j=0; j<bs; j++) {
3580       kmax = rowlengths[rowcount];
3581       for (k=0; k<kmax; k++) {
3582         tmp = jj[nzcount++]/bs;
3583         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3584       }
3585       rowcount++;
3586     }
3587     browlengths[i] += nmask;
3588     /* zero out the mask elements we set */
3589     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3590   }
3591 
3592   /* Do preallocation  */
3593   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3594   a = (Mat_SeqBAIJ*)newmat->data;
3595 
3596   /* set matrix "i" values */
3597   a->i[0] = 0;
3598   for (i=1; i<= mbs; i++) {
3599     a->i[i]      = a->i[i-1] + browlengths[i-1];
3600     a->ilen[i-1] = browlengths[i-1];
3601   }
3602   a->nz         = 0;
3603   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3604 
3605   /* read in nonzero values */
3606   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3607   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3608   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3609 
3610   /* set "a" and "j" values into matrix */
3611   nzcount = 0; jcount = 0;
3612   for (i=0; i<mbs; i++) {
3613     nzcountb = nzcount;
3614     nmask    = 0;
3615     for (j=0; j<bs; j++) {
3616       kmax = rowlengths[i*bs+j];
3617       for (k=0; k<kmax; k++) {
3618         tmp = jj[nzcount++]/bs;
3619 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3620       }
3621     }
3622     /* sort the masked values */
3623     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3624 
3625     /* set "j" values into matrix */
3626     maskcount = 1;
3627     for (j=0; j<nmask; j++) {
3628       a->j[jcount++]  = masked[j];
3629       mask[masked[j]] = maskcount++;
3630     }
3631     /* set "a" values into matrix */
3632     ishift = bs2*a->i[i];
3633     for (j=0; j<bs; j++) {
3634       kmax = rowlengths[i*bs+j];
3635       for (k=0; k<kmax; k++) {
3636         tmp       = jj[nzcountb]/bs ;
3637         block     = mask[tmp] - 1;
3638         point     = jj[nzcountb] - bs*tmp;
3639         idx       = ishift + bs2*block + j + bs*point;
3640         a->a[idx] = (MatScalar)aa[nzcountb++];
3641       }
3642     }
3643     /* zero out the mask elements we set */
3644     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3645   }
3646   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3647 
3648   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3649   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3650   ierr = PetscFree(aa);CHKERRQ(ierr);
3651   ierr = PetscFree(jj);CHKERRQ(ierr);
3652   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3653 
3654   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 #undef __FUNCT__
3660 #define __FUNCT__ "MatCreateSeqBAIJ"
3661 /*@C
3662    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3663    compressed row) format.  For good matrix assembly performance the
3664    user should preallocate the matrix storage by setting the parameter nz
3665    (or the array nnz).  By setting these parameters accurately, performance
3666    during matrix assembly can be increased by more than a factor of 50.
3667 
3668    Collective on MPI_Comm
3669 
3670    Input Parameters:
3671 +  comm - MPI communicator, set to PETSC_COMM_SELF
3672 .  bs - size of block
3673 .  m - number of rows
3674 .  n - number of columns
3675 .  nz - number of nonzero blocks  per block row (same for all rows)
3676 -  nnz - array containing the number of nonzero blocks in the various block rows
3677          (possibly different for each block row) or PETSC_NULL
3678 
3679    Output Parameter:
3680 .  A - the matrix
3681 
3682    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3683    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3684    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3685 
3686    Options Database Keys:
3687 .   -mat_no_unroll - uses code that does not unroll the loops in the
3688                      block calculations (much slower)
3689 .    -mat_block_size - size of the blocks to use
3690 
3691    Level: intermediate
3692 
3693    Notes:
3694    The number of rows and columns must be divisible by blocksize.
3695 
3696    If the nnz parameter is given then the nz parameter is ignored
3697 
3698    A nonzero block is any block that as 1 or more nonzeros in it
3699 
3700    The block AIJ format is fully compatible with standard Fortran 77
3701    storage.  That is, the stored row and column indices can begin at
3702    either one (as in Fortran) or zero.  See the users' manual for details.
3703 
3704    Specify the preallocated storage with either nz or nnz (not both).
3705    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3706    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3707    matrices.
3708 
3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3710 @*/
3711 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3712 {
3713   PetscErrorCode ierr;
3714 
3715   PetscFunctionBegin;
3716   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3717   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3718   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3719   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 #undef __FUNCT__
3724 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3725 /*@C
3726    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3727    per row in the matrix. For good matrix assembly performance the
3728    user should preallocate the matrix storage by setting the parameter nz
3729    (or the array nnz).  By setting these parameters accurately, performance
3730    during matrix assembly can be increased by more than a factor of 50.
3731 
3732    Collective on MPI_Comm
3733 
3734    Input Parameters:
3735 +  A - the matrix
3736 .  bs - size of block
3737 .  nz - number of block nonzeros per block row (same for all rows)
3738 -  nnz - array containing the number of block nonzeros in the various block rows
3739          (possibly different for each block row) or PETSC_NULL
3740 
3741    Options Database Keys:
3742 .   -mat_no_unroll - uses code that does not unroll the loops in the
3743                      block calculations (much slower)
3744 .    -mat_block_size - size of the blocks to use
3745 
3746    Level: intermediate
3747 
3748    Notes:
3749    If the nnz parameter is given then the nz parameter is ignored
3750 
3751    You can call MatGetInfo() to get information on how effective the preallocation was;
3752    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3753    You can also run with the option -info and look for messages with the string
3754    malloc in them to see if additional memory allocation was needed.
3755 
3756    The block AIJ format is fully compatible with standard Fortran 77
3757    storage.  That is, the stored row and column indices can begin at
3758    either one (as in Fortran) or zero.  See the users' manual for details.
3759 
3760    Specify the preallocated storage with either nz or nnz (not both).
3761    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3762    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3763 
3764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3765 @*/
3766 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3767 {
3768   PetscErrorCode ierr;
3769 
3770   PetscFunctionBegin;
3771   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3772   PetscValidType(B,1);
3773   PetscValidLogicalCollectiveInt(B,bs,2);
3774   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3775   PetscFunctionReturn(0);
3776 }
3777 
3778 #undef __FUNCT__
3779 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3780 /*@C
3781    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3782    (the default sequential PETSc format).
3783 
3784    Collective on MPI_Comm
3785 
3786    Input Parameters:
3787 +  A - the matrix
3788 .  i - the indices into j for the start of each local row (starts with zero)
3789 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3790 -  v - optional values in the matrix
3791 
3792    Level: developer
3793 
3794 .keywords: matrix, aij, compressed row, sparse
3795 
3796 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3797 @*/
3798 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3799 {
3800   PetscErrorCode ierr;
3801 
3802   PetscFunctionBegin;
3803   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3804   PetscValidType(B,1);
3805   PetscValidLogicalCollectiveInt(B,bs,2);
3806   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3807   PetscFunctionReturn(0);
3808 }
3809 
3810 
3811 #undef __FUNCT__
3812 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3813 /*@
3814      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3815 
3816      Collective on MPI_Comm
3817 
3818    Input Parameters:
3819 +  comm - must be an MPI communicator of size 1
3820 .  bs - size of block
3821 .  m - number of rows
3822 .  n - number of columns
3823 .  i - row indices
3824 .  j - column indices
3825 -  a - matrix values
3826 
3827    Output Parameter:
3828 .  mat - the matrix
3829 
3830    Level: advanced
3831 
3832    Notes:
3833        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3834     once the matrix is destroyed
3835 
3836        You cannot set new nonzero locations into this matrix, that will generate an error.
3837 
3838        The i and j indices are 0 based
3839 
3840        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3841 
3842 
3843 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3844 
3845 @*/
3846 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3847 {
3848   PetscErrorCode ierr;
3849   PetscInt       ii;
3850   Mat_SeqBAIJ    *baij;
3851 
3852   PetscFunctionBegin;
3853   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3854   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3855 
3856   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3857   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3858   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3859   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3860   baij = (Mat_SeqBAIJ*)(*mat)->data;
3861   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3862   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3863 
3864   baij->i = i;
3865   baij->j = j;
3866   baij->a = a;
3867   baij->singlemalloc = PETSC_FALSE;
3868   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3869   baij->free_a       = PETSC_FALSE;
3870   baij->free_ij      = PETSC_FALSE;
3871 
3872   for (ii=0; ii<m; ii++) {
3873     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3874 #if defined(PETSC_USE_DEBUG)
3875     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3876 #endif
3877   }
3878 #if defined(PETSC_USE_DEBUG)
3879   for (ii=0; ii<baij->i[m]; ii++) {
3880     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3881     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3882   }
3883 #endif
3884 
3885   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3886   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3887   PetscFunctionReturn(0);
3888 }
3889