xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 0e9bae810fdaeb60e2713eaa8ddb89f42e079fd1)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       *tia, *tja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (ja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353 	    }
1354           }
1355         }
1356       }
1357     }
1358 
1359     n     *= bs;
1360     nz *= bs*bs;
1361     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362       ierr = PetscFree(tia);CHKERRQ(ierr);
1363       ierr = PetscFree(tja);CHKERRQ(ierr);
1364     }
1365   } else if (oshift == 1) {
1366     if (symmetric) {
1367       PetscInt nz = tia[A->rmap->n/bs];
1368       /*  add 1 to i and j indices */
1369       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370       *ia = tia;
1371       if (ja) {
1372 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373         *ja = tja;
1374       }
1375     } else {
1376       PetscInt nz = a->i[A->rmap->n/bs];
1377       /* malloc space and  add 1 to i and j indices */
1378       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1379       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380       if (ja) {
1381 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1382 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383       }
1384     }
1385   } else {
1386     *ia = tia;
1387     if (ja) *ja = tja;
1388   }
1389 
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!ia) PetscFunctionReturn(0);
1401   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402     ierr = PetscFree(*ia);CHKERRQ(ierr);
1403     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411 {
1412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416 #if defined(PETSC_USE_LOG)
1417   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418 #endif
1419   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1420   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1421   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1422   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1423   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1424   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1425   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1426   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1427   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1428   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1429   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1430   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1431   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1432 
1433   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1434   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1435   ierr = PetscFree(A->data);CHKERRQ(ierr);
1436 
1437   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1454 {
1455   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   switch (op) {
1460   case MAT_ROW_ORIENTED:
1461     a->roworiented    = flg;
1462     break;
1463   case MAT_KEEP_NONZERO_PATTERN:
1464     a->keepnonzeropattern = flg;
1465     break;
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467     a->nonew          = (flg ? 0 : 1);
1468     break;
1469   case MAT_NEW_NONZERO_LOCATION_ERR:
1470     a->nonew          = (flg ? -1 : 0);
1471     break;
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473     a->nonew          = (flg ? -2 : 0);
1474     break;
1475   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476     a->nounused       = (flg ? -1 : 0);
1477     break;
1478   case MAT_CHECK_COMPRESSED_ROW:
1479     a->compressedrow.check = flg;
1480     break;
1481   case MAT_NEW_DIAGONALS:
1482   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483   case MAT_USE_HASH_TABLE:
1484     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1485     break;
1486   case MAT_SYMMETRIC:
1487   case MAT_STRUCTURALLY_SYMMETRIC:
1488   case MAT_HERMITIAN:
1489   case MAT_SYMMETRY_ETERNAL:
1490     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1491     break;
1492   default:
1493     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1500 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1501 {
1502   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode ierr;
1504   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1505   MatScalar      *aa,*aa_i;
1506   PetscScalar    *v_i;
1507 
1508   PetscFunctionBegin;
1509   bs  = A->rmap->bs;
1510   ai  = a->i;
1511   aj  = a->j;
1512   aa  = a->a;
1513   bs2 = a->bs2;
1514 
1515   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1516 
1517   bn  = row/bs;   /* Block number */
1518   bp  = row % bs; /* Block Position */
1519   M   = ai[bn+1] - ai[bn];
1520   *nz = bs*M;
1521 
1522   if (v) {
1523     *v = 0;
1524     if (*nz) {
1525       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1526       for (i=0; i<M; i++) { /* for each block in the block row */
1527         v_i  = *v + i*bs;
1528         aa_i = aa + bs2*(ai[bn] + i);
1529         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1530       }
1531     }
1532   }
1533 
1534   if (idx) {
1535     *idx = 0;
1536     if (*nz) {
1537       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1538       for (i=0; i<M; i++) { /* for each block in the block row */
1539         idx_i = *idx + i*bs;
1540         itmp  = bs*aj[ai[bn] + i];
1541         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1542       }
1543     }
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1550 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551 {
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1556   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1564 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1565 {
1566   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1567   Mat            C;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1570   PetscInt       *rows,*cols,bs2=a->bs2;
1571   MatScalar      *array;
1572 
1573   PetscFunctionBegin;
1574   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1576     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1577     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1578 
1579     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1580     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1581     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1582     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1583     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1584     ierr = PetscFree(col);CHKERRQ(ierr);
1585   } else {
1586     C = *B;
1587   }
1588 
1589   array = a->a;
1590   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1591   for (i=0; i<mbs; i++) {
1592     cols[0] = i*bs;
1593     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1594     len = ai[i+1] - ai[i];
1595     for (j=0; j<len; j++) {
1596       rows[0] = (*aj++)*bs;
1597       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1598       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1599       array += bs2;
1600     }
1601   }
1602   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1603 
1604   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606 
1607   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1608     *B = C;
1609   } else {
1610     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1618 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1619 {
1620   PetscErrorCode ierr;
1621   Mat            Btrans;
1622 
1623   PetscFunctionBegin;
1624   *f = PETSC_FALSE;
1625   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1626   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1627   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1628   PetscFunctionReturn(0);
1629 }
1630 EXTERN_C_END
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1634 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1635 {
1636   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1637   PetscErrorCode ierr;
1638   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1639   int            fd;
1640   PetscScalar    *aa;
1641   FILE           *file;
1642 
1643   PetscFunctionBegin;
1644   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1645   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1646   col_lens[0] = MAT_FILE_CLASSID;
1647 
1648   col_lens[1] = A->rmap->N;
1649   col_lens[2] = A->cmap->n;
1650   col_lens[3] = a->nz*bs2;
1651 
1652   /* store lengths of each row and write (including header) to file */
1653   count = 0;
1654   for (i=0; i<a->mbs; i++) {
1655     for (j=0; j<bs; j++) {
1656       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1657     }
1658   }
1659   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1660   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1661 
1662   /* store column indices (zero start index) */
1663   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1664   count = 0;
1665   for (i=0; i<a->mbs; i++) {
1666     for (j=0; j<bs; j++) {
1667       for (k=a->i[i]; k<a->i[i+1]; k++) {
1668         for (l=0; l<bs; l++) {
1669           jj[count++] = bs*a->j[k] + l;
1670         }
1671       }
1672     }
1673   }
1674   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1675   ierr = PetscFree(jj);CHKERRQ(ierr);
1676 
1677   /* store nonzero values */
1678   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1679   count = 0;
1680   for (i=0; i<a->mbs; i++) {
1681     for (j=0; j<bs; j++) {
1682       for (k=a->i[i]; k<a->i[i+1]; k++) {
1683         for (l=0; l<bs; l++) {
1684           aa[count++] = a->a[bs2*k + l*bs + j];
1685         }
1686       }
1687     }
1688   }
1689   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1690   ierr = PetscFree(aa);CHKERRQ(ierr);
1691 
1692   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1693   if (file) {
1694     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1701 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1702 {
1703   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1704   PetscErrorCode    ierr;
1705   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1706   PetscViewerFormat format;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1710   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1711     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1712   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1713     Mat aij;
1714     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1715     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1716     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1717   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1718      PetscFunctionReturn(0);
1719   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1720     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1721     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1722     for (i=0; i<a->mbs; i++) {
1723       for (j=0; j<bs; j++) {
1724         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1725         for (k=a->i[i]; k<a->i[i+1]; k++) {
1726           for (l=0; l<bs; l++) {
1727 #if defined(PETSC_USE_COMPLEX)
1728             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1729               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1730                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1731             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1733                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             }
1737 #else
1738             if (a->a[bs2*k + l*bs + j] != 0.0) {
1739               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1740             }
1741 #endif
1742           }
1743         }
1744         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1745       }
1746     }
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1748   } else {
1749     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1750     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1751     for (i=0; i<a->mbs; i++) {
1752       for (j=0; j<bs; j++) {
1753         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1754         for (k=a->i[i]; k<a->i[i+1]; k++) {
1755           for (l=0; l<bs; l++) {
1756 #if defined(PETSC_USE_COMPLEX)
1757             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1758               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1759                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1760             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1761               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1762                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             } else {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1765             }
1766 #else
1767             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1768 #endif
1769           }
1770         }
1771         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1772       }
1773     }
1774     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1775   }
1776   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1782 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1783 {
1784   Mat            A = (Mat) Aa;
1785   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1786   PetscErrorCode ierr;
1787   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1788   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1789   MatScalar      *aa;
1790   PetscViewer    viewer;
1791   PetscViewerFormat format;
1792 
1793   PetscFunctionBegin;
1794   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1795   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1796 
1797   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1798 
1799   /* loop over matrix elements drawing boxes */
1800 
1801   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1802     color = PETSC_DRAW_BLUE;
1803     for (i=0,row=0; i<mbs; i++,row+=bs) {
1804       for (j=a->i[i]; j<a->i[i+1]; j++) {
1805         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807         aa = a->a + j*bs2;
1808         for (k=0; k<bs; k++) {
1809           for (l=0; l<bs; l++) {
1810             if (PetscRealPart(*aa++) >=  0.) continue;
1811             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1812           }
1813         }
1814       }
1815     }
1816     color = PETSC_DRAW_CYAN;
1817     for (i=0,row=0; i<mbs; i++,row+=bs) {
1818       for (j=a->i[i]; j<a->i[i+1]; j++) {
1819         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821         aa = a->a + j*bs2;
1822         for (k=0; k<bs; k++) {
1823           for (l=0; l<bs; l++) {
1824             if (PetscRealPart(*aa++) != 0.) continue;
1825             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1826           }
1827         }
1828       }
1829     }
1830     color = PETSC_DRAW_RED;
1831     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835         aa = a->a + j*bs2;
1836         for (k=0; k<bs; k++) {
1837           for (l=0; l<bs; l++) {
1838             if (PetscRealPart(*aa++) <= 0.) continue;
1839             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840           }
1841         }
1842       }
1843     }
1844   } else {
1845     /* use contour shading to indicate magnitude of values */
1846     /* first determine max of all nonzero values */
1847     PetscDraw   popup;
1848     PetscReal scale,maxv = 0.0;
1849 
1850     for (i=0; i<a->nz*a->bs2; i++) {
1851       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1852     }
1853     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1854     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1855     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1856     for (i=0,row=0; i<mbs; i++,row+=bs) {
1857       for (j=a->i[i]; j<a->i[i+1]; j++) {
1858         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1859         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1860         aa = a->a + j*bs2;
1861         for (k=0; k<bs; k++) {
1862           for (l=0; l<bs; l++) {
1863             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1864             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1865           }
1866         }
1867       }
1868     }
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1875 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1876 {
1877   PetscErrorCode ierr;
1878   PetscReal      xl,yl,xr,yr,w,h;
1879   PetscDraw      draw;
1880   PetscBool      isnull;
1881 
1882   PetscFunctionBegin;
1883 
1884   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1885   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1886 
1887   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1888   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1889   xr += w;    yr += h;  xl = -w;     yl = -h;
1890   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1891   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1892   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatView_SeqBAIJ"
1898 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1899 {
1900   PetscErrorCode ierr;
1901   PetscBool      iascii,isbinary,isdraw;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1905   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1906   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1907   if (iascii){
1908     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1909   } else if (isbinary) {
1910     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1911   } else if (isdraw) {
1912     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1913   } else {
1914     Mat B;
1915     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1916     ierr = MatView(B,viewer);CHKERRQ(ierr);
1917     ierr = MatDestroy(&B);CHKERRQ(ierr);
1918   }
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1925 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1926 {
1927   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1929   PetscInt    *ai = a->i,*ailen = a->ilen;
1930   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1931   MatScalar   *ap,*aa = a->a;
1932 
1933   PetscFunctionBegin;
1934   for (k=0; k<m; k++) { /* loop over rows */
1935     row  = im[k]; brow = row/bs;
1936     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1937     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1938     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1939     nrow = ailen[brow];
1940     for (l=0; l<n; l++) { /* loop over columns */
1941       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1942       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1943       col  = in[l] ;
1944       bcol = col/bs;
1945       cidx = col%bs;
1946       ridx = row%bs;
1947       high = nrow;
1948       low  = 0; /* assume unsorted */
1949       while (high-low > 5) {
1950         t = (low+high)/2;
1951         if (rp[t] > bcol) high = t;
1952         else             low  = t;
1953       }
1954       for (i=low; i<high; i++) {
1955         if (rp[i] > bcol) break;
1956         if (rp[i] == bcol) {
1957           *v++ = ap[bs2*i+bs*cidx+ridx];
1958           goto finished;
1959         }
1960       }
1961       *v++ = 0.0;
1962       finished:;
1963     }
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1970 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1971 {
1972   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1973   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1974   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1975   PetscErrorCode    ierr;
1976   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1977   PetscBool         roworiented=a->roworiented;
1978   const PetscScalar *value = v;
1979   MatScalar         *ap,*aa = a->a,*bap;
1980 
1981   PetscFunctionBegin;
1982   if (roworiented) {
1983     stepval = (n-1)*bs;
1984   } else {
1985     stepval = (m-1)*bs;
1986   }
1987   for (k=0; k<m; k++) { /* loop over added rows */
1988     row  = im[k];
1989     if (row < 0) continue;
1990 #if defined(PETSC_USE_DEBUG)
1991     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1992 #endif
1993     rp   = aj + ai[row];
1994     ap   = aa + bs2*ai[row];
1995     rmax = imax[row];
1996     nrow = ailen[row];
1997     low  = 0;
1998     high = nrow;
1999     for (l=0; l<n; l++) { /* loop over added columns */
2000       if (in[l] < 0) continue;
2001 #if defined(PETSC_USE_DEBUG)
2002       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2003 #endif
2004       col = in[l];
2005       if (roworiented) {
2006         value = v + (k*(stepval+bs) + l)*bs;
2007       } else {
2008         value = v + (l*(stepval+bs) + k)*bs;
2009       }
2010       if (col <= lastcol) low = 0; else high = nrow;
2011       lastcol = col;
2012       while (high-low > 7) {
2013         t = (low+high)/2;
2014         if (rp[t] > col) high = t;
2015         else             low  = t;
2016       }
2017       for (i=low; i<high; i++) {
2018         if (rp[i] > col) break;
2019         if (rp[i] == col) {
2020           bap  = ap +  bs2*i;
2021           if (roworiented) {
2022             if (is == ADD_VALUES) {
2023               for (ii=0; ii<bs; ii++,value+=stepval) {
2024                 for (jj=ii; jj<bs2; jj+=bs) {
2025                   bap[jj] += *value++;
2026                 }
2027               }
2028             } else {
2029               for (ii=0; ii<bs; ii++,value+=stepval) {
2030                 for (jj=ii; jj<bs2; jj+=bs) {
2031                   bap[jj] = *value++;
2032                 }
2033               }
2034             }
2035           } else {
2036             if (is == ADD_VALUES) {
2037               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038                 for (jj=0; jj<bs; jj++) {
2039                   bap[jj] += value[jj];
2040                 }
2041                 bap += bs;
2042               }
2043             } else {
2044               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2045                 for (jj=0; jj<bs; jj++) {
2046                   bap[jj]  = value[jj];
2047                 }
2048                 bap += bs;
2049               }
2050             }
2051           }
2052           goto noinsert2;
2053         }
2054       }
2055       if (nonew == 1) goto noinsert2;
2056       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2057       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2058       N = nrow++ - 1; high++;
2059       /* shift up all the later entries in this row */
2060       for (ii=N; ii>=i; ii--) {
2061         rp[ii+1] = rp[ii];
2062         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2063       }
2064       if (N >= i) {
2065         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2066       }
2067       rp[i] = col;
2068       bap   = ap +  bs2*i;
2069       if (roworiented) {
2070         for (ii=0; ii<bs; ii++,value+=stepval) {
2071           for (jj=ii; jj<bs2; jj+=bs) {
2072             bap[jj] = *value++;
2073           }
2074         }
2075       } else {
2076         for (ii=0; ii<bs; ii++,value+=stepval) {
2077           for (jj=0; jj<bs; jj++) {
2078             *bap++  = *value++;
2079           }
2080         }
2081       }
2082       noinsert2:;
2083       low = i;
2084     }
2085     ailen[row] = nrow;
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2092 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2093 {
2094   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2095   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2096   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2097   PetscErrorCode ierr;
2098   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2099   MatScalar      *aa = a->a,*ap;
2100   PetscReal      ratio=0.6;
2101 
2102   PetscFunctionBegin;
2103   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2104 
2105   if (m) rmax = ailen[0];
2106   for (i=1; i<mbs; i++) {
2107     /* move each row back by the amount of empty slots (fshift) before it*/
2108     fshift += imax[i-1] - ailen[i-1];
2109     rmax   = PetscMax(rmax,ailen[i]);
2110     if (fshift) {
2111       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2112       N = ailen[i];
2113       for (j=0; j<N; j++) {
2114         ip[j-fshift] = ip[j];
2115         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2116       }
2117     }
2118     ai[i] = ai[i-1] + ailen[i-1];
2119   }
2120   if (mbs) {
2121     fshift += imax[mbs-1] - ailen[mbs-1];
2122     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2123   }
2124   /* reset ilen and imax for each row */
2125   for (i=0; i<mbs; i++) {
2126     ailen[i] = imax[i] = ai[i+1] - ai[i];
2127   }
2128   a->nz = ai[mbs];
2129 
2130   /* diagonals may have moved, so kill the diagonal pointers */
2131   a->idiagvalid = PETSC_FALSE;
2132   if (fshift && a->diag) {
2133     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2134     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2135     a->diag = 0;
2136   }
2137   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2138   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2139   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2140   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2141   A->info.mallocs     += a->reallocs;
2142   a->reallocs          = 0;
2143   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2144 
2145   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2146   A->same_nonzero = PETSC_TRUE;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    This function returns an array of flags which indicate the locations of contiguous
2152    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2153    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2154    Assume: sizes should be long enough to hold all the values.
2155 */
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2158 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2159 {
2160   PetscInt   i,j,k,row;
2161   PetscBool  flg;
2162 
2163   PetscFunctionBegin;
2164   for (i=0,j=0; i<n; j++) {
2165     row = idx[i];
2166     if (row%bs!=0) { /* Not the begining of a block */
2167       sizes[j] = 1;
2168       i++;
2169     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2170       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2171       i++;
2172     } else { /* Begining of the block, so check if the complete block exists */
2173       flg = PETSC_TRUE;
2174       for (k=1; k<bs; k++) {
2175         if (row+k != idx[i+k]) { /* break in the block */
2176           flg = PETSC_FALSE;
2177           break;
2178         }
2179       }
2180       if (flg) { /* No break in the bs */
2181         sizes[j] = bs;
2182         i+= bs;
2183       } else {
2184         sizes[j] = 1;
2185         i++;
2186       }
2187     }
2188   }
2189   *bs_max = j;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2195 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2196 {
2197   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2198   PetscErrorCode    ierr;
2199   PetscInt          i,j,k,count,*rows;
2200   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2201   PetscScalar       zero = 0.0;
2202   MatScalar         *aa;
2203   const PetscScalar *xx;
2204   PetscScalar       *bb;
2205 
2206   PetscFunctionBegin;
2207   /* fix right hand side if needed */
2208   if (x && b) {
2209     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2210     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2211     for (i=0; i<is_n; i++) {
2212       bb[is_idx[i]] = diag*xx[is_idx[i]];
2213     }
2214     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2215     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2216   }
2217 
2218   /* Make a copy of the IS and  sort it */
2219   /* allocate memory for rows,sizes */
2220   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2221 
2222   /* copy IS values to rows, and sort them */
2223   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2224   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2225 
2226   if (baij->keepnonzeropattern) {
2227     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2228     bs_max = is_n;
2229     A->same_nonzero = PETSC_TRUE;
2230   } else {
2231     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2232     A->same_nonzero = PETSC_FALSE;
2233   }
2234 
2235   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2236     row   = rows[j];
2237     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2238     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2239     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2240     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2241       if (diag != (PetscScalar)0.0) {
2242         if (baij->ilen[row/bs] > 0) {
2243           baij->ilen[row/bs]       = 1;
2244           baij->j[baij->i[row/bs]] = row/bs;
2245           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2246         }
2247         /* Now insert all the diagonal values for this bs */
2248         for (k=0; k<bs; k++) {
2249           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2250         }
2251       } else { /* (diag == 0.0) */
2252         baij->ilen[row/bs] = 0;
2253       } /* end (diag == 0.0) */
2254     } else { /* (sizes[i] != bs) */
2255 #if defined (PETSC_USE_DEBUG)
2256       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2257 #endif
2258       for (k=0; k<count; k++) {
2259         aa[0] =  zero;
2260         aa    += bs;
2261       }
2262       if (diag != (PetscScalar)0.0) {
2263         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2264       }
2265     }
2266   }
2267 
2268   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2269   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2275 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2276 {
2277   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2278   PetscErrorCode    ierr;
2279   PetscInt          i,j,k,count;
2280   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2281   PetscScalar       zero = 0.0;
2282   MatScalar         *aa;
2283   const PetscScalar *xx;
2284   PetscScalar       *bb;
2285   PetscBool         *zeroed,vecs = PETSC_FALSE;
2286 
2287   PetscFunctionBegin;
2288   /* fix right hand side if needed */
2289   if (x && b) {
2290     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2291     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2292     vecs = PETSC_TRUE;
2293   }
2294   A->same_nonzero = PETSC_TRUE;
2295 
2296   /* zero the columns */
2297   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2298   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2299   for (i=0; i<is_n; i++) {
2300     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2301     zeroed[is_idx[i]] = PETSC_TRUE;
2302   }
2303   for (i=0; i<A->rmap->N; i++) {
2304     if (!zeroed[i]) {
2305       row = i/bs;
2306       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2307         for (k=0; k<bs; k++) {
2308           col = bs*baij->j[j] + k;
2309 	  if (zeroed[col]) {
2310 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2311             if (vecs) bb[i] -= aa[0]*xx[col];
2312             aa[0] = 0.0;
2313           }
2314         }
2315       }
2316     } else if (vecs) bb[i] = diag*xx[i];
2317   }
2318   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2319   if (vecs) {
2320     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2321     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2322   }
2323 
2324   /* zero the rows */
2325   for (i=0; i<is_n; i++) {
2326     row   = is_idx[i];
2327     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2328     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2329     for (k=0; k<count; k++) {
2330       aa[0] =  zero;
2331       aa    += bs;
2332     }
2333     if (diag != (PetscScalar)0.0) {
2334       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2335     }
2336   }
2337   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2343 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2344 {
2345   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2346   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2347   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2348   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2349   PetscErrorCode ierr;
2350   PetscInt       ridx,cidx,bs2=a->bs2;
2351   PetscBool      roworiented=a->roworiented;
2352   MatScalar      *ap,value,*aa=a->a,*bap;
2353 
2354   PetscFunctionBegin;
2355   if (v) PetscValidScalarPointer(v,6);
2356   for (k=0; k<m; k++) { /* loop over added rows */
2357     row  = im[k];
2358     brow = row/bs;
2359     if (row < 0) continue;
2360 #if defined(PETSC_USE_DEBUG)
2361     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2362 #endif
2363     rp   = aj + ai[brow];
2364     ap   = aa + bs2*ai[brow];
2365     rmax = imax[brow];
2366     nrow = ailen[brow];
2367     low  = 0;
2368     high = nrow;
2369     for (l=0; l<n; l++) { /* loop over added columns */
2370       if (in[l] < 0) continue;
2371 #if defined(PETSC_USE_DEBUG)
2372       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2373 #endif
2374       col = in[l]; bcol = col/bs;
2375       ridx = row % bs; cidx = col % bs;
2376       if (roworiented) {
2377         value = v[l + k*n];
2378       } else {
2379         value = v[k + l*m];
2380       }
2381       if (col <= lastcol) low = 0; else high = nrow;
2382       lastcol = col;
2383       while (high-low > 7) {
2384         t = (low+high)/2;
2385         if (rp[t] > bcol) high = t;
2386         else              low  = t;
2387       }
2388       for (i=low; i<high; i++) {
2389         if (rp[i] > bcol) break;
2390         if (rp[i] == bcol) {
2391           bap  = ap +  bs2*i + bs*cidx + ridx;
2392           if (is == ADD_VALUES) *bap += value;
2393           else                  *bap  = value;
2394           goto noinsert1;
2395         }
2396       }
2397       if (nonew == 1) goto noinsert1;
2398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2399       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2400       N = nrow++ - 1; high++;
2401       /* shift up all the later entries in this row */
2402       for (ii=N; ii>=i; ii--) {
2403         rp[ii+1] = rp[ii];
2404         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2405       }
2406       if (N>=i) {
2407         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2408       }
2409       rp[i]                      = bcol;
2410       ap[bs2*i + bs*cidx + ridx] = value;
2411       a->nz++;
2412       noinsert1:;
2413       low = i;
2414     }
2415     ailen[brow] = nrow;
2416   }
2417   A->same_nonzero = PETSC_FALSE;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2423 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2426   Mat            outA;
2427   PetscErrorCode ierr;
2428   PetscBool      row_identity,col_identity;
2429 
2430   PetscFunctionBegin;
2431   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2432   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2433   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2434   if (!row_identity || !col_identity) {
2435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2436   }
2437 
2438   outA            = inA;
2439   inA->factortype = MAT_FACTOR_LU;
2440 
2441   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2442 
2443   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2444   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2445   a->row = row;
2446   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2447   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2448   a->col = col;
2449 
2450   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2451   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2452    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2453   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2454 
2455   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2456   if (!a->solve_work) {
2457     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2458     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2459   }
2460   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2461 
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 EXTERN_C_BEGIN
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2468 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2469 {
2470   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2471   PetscInt    i,nz,mbs;
2472 
2473   PetscFunctionBegin;
2474   nz  = baij->maxnz;
2475   mbs = baij->mbs;
2476   for (i=0; i<nz; i++) {
2477     baij->j[i] = indices[i];
2478   }
2479   baij->nz = nz;
2480   for (i=0; i<mbs; i++) {
2481     baij->ilen[i] = baij->imax[i];
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 EXTERN_C_END
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2489 /*@
2490     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2491        in the matrix.
2492 
2493   Input Parameters:
2494 +  mat - the SeqBAIJ matrix
2495 -  indices - the column indices
2496 
2497   Level: advanced
2498 
2499   Notes:
2500     This can be called if you have precomputed the nonzero structure of the
2501   matrix and want to provide it to the matrix object to improve the performance
2502   of the MatSetValues() operation.
2503 
2504     You MUST have set the correct numbers of nonzeros per row in the call to
2505   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2506 
2507     MUST be called before any calls to MatSetValues();
2508 
2509 @*/
2510 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2511 {
2512   PetscErrorCode ierr;
2513 
2514   PetscFunctionBegin;
2515   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2516   PetscValidPointer(indices,2);
2517   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2523 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2524 {
2525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2526   PetscErrorCode ierr;
2527   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2528   PetscReal      atmp;
2529   PetscScalar    *x,zero = 0.0;
2530   MatScalar      *aa;
2531   PetscInt       ncols,brow,krow,kcol;
2532 
2533   PetscFunctionBegin;
2534   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2535   bs   = A->rmap->bs;
2536   aa   = a->a;
2537   ai   = a->i;
2538   aj   = a->j;
2539   mbs  = a->mbs;
2540 
2541   ierr = VecSet(v,zero);CHKERRQ(ierr);
2542   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2543   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2544   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2545   for (i=0; i<mbs; i++) {
2546     ncols = ai[1] - ai[0]; ai++;
2547     brow  = bs*i;
2548     for (j=0; j<ncols; j++){
2549       for (kcol=0; kcol<bs; kcol++){
2550         for (krow=0; krow<bs; krow++){
2551           atmp = PetscAbsScalar(*aa);aa++;
2552           row = brow + krow;    /* row index */
2553           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2554           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2555         }
2556       }
2557       aj++;
2558     }
2559   }
2560   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatCopy_SeqBAIJ"
2566 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2567 {
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   /* If the two matrices have the same copy implementation, use fast copy. */
2572   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2573     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2574     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2575     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2576 
2577     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2578     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2579     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2580   } else {
2581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2588 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2589 {
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2599 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2600 {
2601   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2602   PetscFunctionBegin;
2603   *array = a->a;
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2609 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2610 {
2611   PetscFunctionBegin;
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 #undef __FUNCT__
2616 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2617 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2618 {
2619   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2620   PetscErrorCode ierr;
2621   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2622   PetscBLASInt   one=1;
2623 
2624   PetscFunctionBegin;
2625   if (str == SAME_NONZERO_PATTERN) {
2626     PetscScalar alpha = a;
2627     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2628     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2629   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2630     if (y->xtoy && y->XtoY != X) {
2631       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2632       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2633     }
2634     if (!y->xtoy) { /* get xtoy */
2635       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2636       y->XtoY = X;
2637       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2638     }
2639     for (i=0; i<x->nz; i++) {
2640       j = 0;
2641       while (j < bs2){
2642         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2643         j++;
2644       }
2645     }
2646     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2647   } else {
2648     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2649   }
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2655 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2656 {
2657   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2658   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2659   MatScalar      *aa = a->a;
2660 
2661   PetscFunctionBegin;
2662   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2668 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2669 {
2670   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2671   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2672   MatScalar      *aa = a->a;
2673 
2674   PetscFunctionBegin;
2675   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2676   PetscFunctionReturn(0);
2677 }
2678 
2679 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2683 /*
2684     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2685 */
2686 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2687 {
2688   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2689   PetscErrorCode ierr;
2690   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2691   PetscInt       nz = a->i[m],row,*jj,mr,col;
2692 
2693   PetscFunctionBegin;
2694   *nn = n;
2695   if (!ia) PetscFunctionReturn(0);
2696   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2697   else {
2698     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2699     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2700     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2701     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2702     jj = a->j;
2703     for (i=0; i<nz; i++) {
2704       collengths[jj[i]]++;
2705     }
2706     cia[0] = oshift;
2707     for (i=0; i<n; i++) {
2708       cia[i+1] = cia[i] + collengths[i];
2709     }
2710     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2711     jj   = a->j;
2712     for (row=0; row<m; row++) {
2713       mr = a->i[row+1] - a->i[row];
2714       for (i=0; i<mr; i++) {
2715         col = *jj++;
2716         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2717       }
2718     }
2719     ierr = PetscFree(collengths);CHKERRQ(ierr);
2720     *ia = cia; *ja = cja;
2721   }
2722   PetscFunctionReturn(0);
2723 }
2724 
2725 #undef __FUNCT__
2726 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2727 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2728 {
2729   PetscErrorCode ierr;
2730 
2731   PetscFunctionBegin;
2732   if (!ia) PetscFunctionReturn(0);
2733   ierr = PetscFree(*ia);CHKERRQ(ierr);
2734   ierr = PetscFree(*ja);CHKERRQ(ierr);
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2740 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2741 {
2742   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2743   PetscErrorCode ierr;
2744   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2745   PetscScalar    dx,*y,*xx,*w3_array;
2746   PetscScalar    *vscale_array;
2747   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2748   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2749   void           *fctx = coloring->fctx;
2750   PetscBool      flg = PETSC_FALSE;
2751   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2752   Vec            x1_tmp;
2753 
2754   PetscFunctionBegin;
2755   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2756   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2757   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2758   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2759 
2760   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2761   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2762   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2763   if (flg) {
2764     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2765   } else {
2766     PetscBool  assembled;
2767     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2768     if (assembled) {
2769       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2770     }
2771   }
2772 
2773   x1_tmp = x1;
2774   if (!coloring->vscale){
2775     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2776   }
2777 
2778   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2779     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2780   }
2781   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2782 
2783   /* Set w1 = F(x1) */
2784   if (!coloring->fset) {
2785     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2786     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2787     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2788   } else {
2789     coloring->fset = PETSC_FALSE;
2790   }
2791 
2792   if (!coloring->w3) {
2793     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2794     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2795   }
2796   w3 = coloring->w3;
2797 
2798     CHKMEMQ;
2799     /* Compute all the local scale factors, including ghost points */
2800   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2801   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2802   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2803   if (ctype == IS_COLORING_GHOSTED){
2804     col_start = 0; col_end = N;
2805   } else if (ctype == IS_COLORING_GLOBAL){
2806     xx = xx - start;
2807     vscale_array = vscale_array - start;
2808     col_start = start; col_end = N + start;
2809   }    CHKMEMQ;
2810   for (col=col_start; col<col_end; col++){
2811     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2812     if (coloring->htype[0] == 'w') {
2813       dx = 1.0 + unorm;
2814     } else {
2815       dx  = xx[col];
2816     }
2817     if (dx == (PetscScalar)0.0) dx = 1.0;
2818 #if !defined(PETSC_USE_COMPLEX)
2819     if (dx < umin && dx >= 0.0)      dx = umin;
2820     else if (dx < 0.0 && dx > -umin) dx = -umin;
2821 #else
2822     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2823     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2824 #endif
2825     dx               *= epsilon;
2826     vscale_array[col] = (PetscScalar)1.0/dx;
2827   }     CHKMEMQ;
2828   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2829   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2830   if (ctype == IS_COLORING_GLOBAL){
2831     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2832     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2833   }
2834   CHKMEMQ;
2835   if (coloring->vscaleforrow) {
2836     vscaleforrow = coloring->vscaleforrow;
2837   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2838 
2839   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2840   /*
2841     Loop over each color
2842   */
2843   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2844   for (k=0; k<coloring->ncolors; k++) {
2845     coloring->currentcolor = k;
2846     for (i=0; i<bs; i++) {
2847       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2848       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2849       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2850       /*
2851 	Loop over each column associated with color
2852 	adding the perturbation to the vector w3.
2853       */
2854       for (l=0; l<coloring->ncolumns[k]; l++) {
2855 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2856 	if (coloring->htype[0] == 'w') {
2857 	  dx = 1.0 + unorm;
2858 	} else {
2859 	  dx  = xx[col];
2860 	}
2861 	if (dx == (PetscScalar)0.0) dx = 1.0;
2862 #if !defined(PETSC_USE_COMPLEX)
2863 	if (dx < umin && dx >= 0.0)      dx = umin;
2864 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2865 #else
2866 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2867 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2868 #endif
2869 	dx            *= epsilon;
2870 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2871 	w3_array[col] += dx;
2872       }
2873       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2874       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2875 
2876       /*
2877 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2878 	w2 = F(x1 + dx) - F(x1)
2879       */
2880       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2881       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2882       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2883       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2884 
2885       /*
2886 	Loop over rows of vector, putting results into Jacobian matrix
2887       */
2888       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2889       for (l=0; l<coloring->nrows[k]; l++) {
2890 	row    = bs*coloring->rows[k][l];             /* local row index */
2891 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2892         for (j=0; j<bs; j++) {
2893   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2894           srows[j]  = row + start + j;
2895         }
2896 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2897       }
2898       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2899     }
2900   } /* endof for each color */
2901   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2902   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2903   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2904   ierr = PetscFree(srows);CHKERRQ(ierr);
2905 
2906   coloring->currentcolor = -1;
2907   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2908   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2909   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 /* -------------------------------------------------------------------*/
2914 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2915        MatGetRow_SeqBAIJ,
2916        MatRestoreRow_SeqBAIJ,
2917        MatMult_SeqBAIJ_N,
2918 /* 4*/ MatMultAdd_SeqBAIJ_N,
2919        MatMultTranspose_SeqBAIJ,
2920        MatMultTransposeAdd_SeqBAIJ,
2921        0,
2922        0,
2923        0,
2924 /*10*/ 0,
2925        MatLUFactor_SeqBAIJ,
2926        0,
2927        0,
2928        MatTranspose_SeqBAIJ,
2929 /*15*/ MatGetInfo_SeqBAIJ,
2930        MatEqual_SeqBAIJ,
2931        MatGetDiagonal_SeqBAIJ,
2932        MatDiagonalScale_SeqBAIJ,
2933        MatNorm_SeqBAIJ,
2934 /*20*/ 0,
2935        MatAssemblyEnd_SeqBAIJ,
2936        MatSetOption_SeqBAIJ,
2937        MatZeroEntries_SeqBAIJ,
2938 /*24*/ MatZeroRows_SeqBAIJ,
2939        0,
2940        0,
2941        0,
2942        0,
2943 /*29*/ MatSetUp_SeqBAIJ,
2944        0,
2945        0,
2946        0,
2947        0,
2948 /*34*/ MatDuplicate_SeqBAIJ,
2949        0,
2950        0,
2951        MatILUFactor_SeqBAIJ,
2952        0,
2953 /*39*/ MatAXPY_SeqBAIJ,
2954        MatGetSubMatrices_SeqBAIJ,
2955        MatIncreaseOverlap_SeqBAIJ,
2956        MatGetValues_SeqBAIJ,
2957        MatCopy_SeqBAIJ,
2958 /*44*/ 0,
2959        MatScale_SeqBAIJ,
2960        0,
2961        0,
2962        MatZeroRowsColumns_SeqBAIJ,
2963 /*49*/ 0,
2964        MatGetRowIJ_SeqBAIJ,
2965        MatRestoreRowIJ_SeqBAIJ,
2966        MatGetColumnIJ_SeqBAIJ,
2967        MatRestoreColumnIJ_SeqBAIJ,
2968 /*54*/ MatFDColoringCreate_SeqAIJ,
2969        0,
2970        0,
2971        0,
2972        MatSetValuesBlocked_SeqBAIJ,
2973 /*59*/ MatGetSubMatrix_SeqBAIJ,
2974        MatDestroy_SeqBAIJ,
2975        MatView_SeqBAIJ,
2976        0,
2977        0,
2978 /*64*/ 0,
2979        0,
2980        0,
2981        0,
2982        0,
2983 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2984        0,
2985        MatConvert_Basic,
2986        0,
2987        0,
2988 /*74*/ 0,
2989        MatFDColoringApply_BAIJ,
2990        0,
2991        0,
2992        0,
2993 /*79*/ 0,
2994        0,
2995        0,
2996        0,
2997        MatLoad_SeqBAIJ,
2998 /*84*/ 0,
2999        0,
3000        0,
3001        0,
3002        0,
3003 /*89*/ 0,
3004        0,
3005        0,
3006        0,
3007        0,
3008 /*94*/ 0,
3009        0,
3010        0,
3011        0,
3012        0,
3013 /*99*/0,
3014        0,
3015        0,
3016        0,
3017        0,
3018 /*104*/0,
3019        MatRealPart_SeqBAIJ,
3020        MatImaginaryPart_SeqBAIJ,
3021        0,
3022        0,
3023 /*109*/0,
3024        0,
3025        0,
3026        0,
3027        MatMissingDiagonal_SeqBAIJ,
3028 /*114*/0,
3029        0,
3030        0,
3031        0,
3032        0,
3033 /*119*/0,
3034        0,
3035        MatMultHermitianTranspose_SeqBAIJ,
3036        MatMultHermitianTransposeAdd_SeqBAIJ,
3037        0,
3038 /*124*/0,
3039        0,
3040        MatInvertBlockDiagonal_SeqBAIJ
3041 };
3042 
3043 EXTERN_C_BEGIN
3044 #undef __FUNCT__
3045 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3046 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3047 {
3048   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3049   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3050   PetscErrorCode ierr;
3051 
3052   PetscFunctionBegin;
3053   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3054 
3055   /* allocate space for values if not already there */
3056   if (!aij->saved_values) {
3057     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3058     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3059   }
3060 
3061   /* copy values over */
3062   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3063   PetscFunctionReturn(0);
3064 }
3065 EXTERN_C_END
3066 
3067 EXTERN_C_BEGIN
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3070 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3071 {
3072   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3073   PetscErrorCode ierr;
3074   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3075 
3076   PetscFunctionBegin;
3077   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3078   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3079 
3080   /* copy values over */
3081   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3082   PetscFunctionReturn(0);
3083 }
3084 EXTERN_C_END
3085 
3086 EXTERN_C_BEGIN
3087 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3088 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3089 EXTERN_C_END
3090 
3091 EXTERN_C_BEGIN
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3094 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3095 {
3096   Mat_SeqBAIJ    *b;
3097   PetscErrorCode ierr;
3098   PetscInt       i,mbs,nbs,bs2;
3099   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3100 
3101   PetscFunctionBegin;
3102   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3103   if (nz == MAT_SKIP_ALLOCATION) {
3104     skipallocation = PETSC_TRUE;
3105     nz             = 0;
3106   }
3107 
3108   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3109   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3110   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3111   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3112   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3113 
3114   B->preallocated = PETSC_TRUE;
3115 
3116   mbs  = B->rmap->n/bs;
3117   nbs  = B->cmap->n/bs;
3118   bs2  = bs*bs;
3119 
3120   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3121 
3122   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3123   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3124   if (nnz) {
3125     for (i=0; i<mbs; i++) {
3126       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3127       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3128     }
3129   }
3130 
3131   b       = (Mat_SeqBAIJ*)B->data;
3132   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3133     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3134   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3135 
3136   if (!flg) {
3137     switch (bs) {
3138     case 1:
3139       B->ops->mult            = MatMult_SeqBAIJ_1;
3140       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3141       B->ops->sor             = MatSOR_SeqBAIJ_1;
3142       break;
3143     case 2:
3144       B->ops->mult            = MatMult_SeqBAIJ_2;
3145       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3146       B->ops->sor             = MatSOR_SeqBAIJ_2;
3147       break;
3148     case 3:
3149       B->ops->mult            = MatMult_SeqBAIJ_3;
3150       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3151       B->ops->sor             = MatSOR_SeqBAIJ_3;
3152       break;
3153     case 4:
3154       B->ops->mult            = MatMult_SeqBAIJ_4;
3155       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3156       B->ops->sor             = MatSOR_SeqBAIJ_4;
3157       break;
3158     case 5:
3159       B->ops->mult            = MatMult_SeqBAIJ_5;
3160       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3161       B->ops->sor             = MatSOR_SeqBAIJ_5;
3162       break;
3163     case 6:
3164       B->ops->mult            = MatMult_SeqBAIJ_6;
3165       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3166       B->ops->sor             = MatSOR_SeqBAIJ_6;
3167       break;
3168     case 7:
3169       B->ops->mult            = MatMult_SeqBAIJ_7;
3170       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3171       B->ops->sor             = MatSOR_SeqBAIJ_7;
3172       break;
3173     case 15:
3174       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3175       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3176       B->ops->sor             = MatSOR_SeqBAIJ_N;
3177       break;
3178     default:
3179       B->ops->mult            = MatMult_SeqBAIJ_N;
3180       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3181       B->ops->sor             = MatSOR_SeqBAIJ_N;
3182       break;
3183     }
3184   }
3185   b->mbs       = mbs;
3186   b->nbs       = nbs;
3187   if (!skipallocation) {
3188     if (!b->imax) {
3189       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3190       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3191       b->free_imax_ilen = PETSC_TRUE;
3192     }
3193     /* b->ilen will count nonzeros in each block row so far. */
3194     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3195     if (!nnz) {
3196       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3197       else if (nz < 0) nz = 1;
3198       for (i=0; i<mbs; i++) b->imax[i] = nz;
3199       nz = nz*mbs;
3200     } else {
3201       nz = 0;
3202       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3203     }
3204 
3205     /* allocate the matrix space */
3206     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3207     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3208     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3209     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3210     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3211     b->singlemalloc = PETSC_TRUE;
3212     b->i[0] = 0;
3213     for (i=1; i<mbs+1; i++) {
3214       b->i[i] = b->i[i-1] + b->imax[i-1];
3215     }
3216     b->free_a     = PETSC_TRUE;
3217     b->free_ij    = PETSC_TRUE;
3218   } else {
3219     b->free_a     = PETSC_FALSE;
3220     b->free_ij    = PETSC_FALSE;
3221   }
3222 
3223   b->bs2              = bs2;
3224   b->mbs              = mbs;
3225   b->nz               = 0;
3226   b->maxnz            = nz;
3227   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3228   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3229   PetscFunctionReturn(0);
3230 }
3231 EXTERN_C_END
3232 
3233 EXTERN_C_BEGIN
3234 #undef __FUNCT__
3235 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3236 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3237 {
3238   PetscInt       i,m,nz,nz_max=0,*nnz;
3239   PetscScalar    *values=0;
3240   PetscErrorCode ierr;
3241 
3242   PetscFunctionBegin;
3243   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3244   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3245   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3246   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3247   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3248   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3249   m = B->rmap->n/bs;
3250 
3251   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3252   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3253   for(i=0; i<m; i++) {
3254     nz = ii[i+1]- ii[i];
3255     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3256     nz_max = PetscMax(nz_max, nz);
3257     nnz[i] = nz;
3258   }
3259   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3260   ierr = PetscFree(nnz);CHKERRQ(ierr);
3261 
3262   values = (PetscScalar*)V;
3263   if (!values) {
3264     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3265     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3266   }
3267   for (i=0; i<m; i++) {
3268     PetscInt          ncols  = ii[i+1] - ii[i];
3269     const PetscInt    *icols = jj + ii[i];
3270     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3271     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3272   }
3273   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3274   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3275   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3276   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3277   PetscFunctionReturn(0);
3278 }
3279 EXTERN_C_END
3280 
3281 
3282 EXTERN_C_BEGIN
3283 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3284 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3285 #if defined(PETSC_HAVE_MUMPS)
3286 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3287 #endif
3288 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3289 EXTERN_C_END
3290 
3291 /*MC
3292    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3293    block sparse compressed row format.
3294 
3295    Options Database Keys:
3296 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3297 
3298   Level: beginner
3299 
3300 .seealso: MatCreateSeqBAIJ()
3301 M*/
3302 
3303 EXTERN_C_BEGIN
3304 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3305 EXTERN_C_END
3306 
3307 EXTERN_C_BEGIN
3308 #undef __FUNCT__
3309 #define __FUNCT__ "MatCreate_SeqBAIJ"
3310 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3311 {
3312   PetscErrorCode ierr;
3313   PetscMPIInt    size;
3314   Mat_SeqBAIJ    *b;
3315 
3316   PetscFunctionBegin;
3317   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3318   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3319 
3320   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3321   B->data = (void*)b;
3322   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3323   b->row                   = 0;
3324   b->col                   = 0;
3325   b->icol                  = 0;
3326   b->reallocs              = 0;
3327   b->saved_values          = 0;
3328 
3329   b->roworiented           = PETSC_TRUE;
3330   b->nonew                 = 0;
3331   b->diag                  = 0;
3332   b->solve_work            = 0;
3333   b->mult_work             = 0;
3334   B->spptr                 = 0;
3335   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3336   b->keepnonzeropattern    = PETSC_FALSE;
3337   b->xtoy                  = 0;
3338   b->XtoY                  = 0;
3339   B->same_nonzero          = PETSC_FALSE;
3340 
3341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3342                                      "MatGetFactorAvailable_seqbaij_petsc",
3343                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3345                                      "MatGetFactor_seqbaij_petsc",
3346                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3348                                      "MatGetFactor_seqbaij_bstrm",
3349                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3350 #if defined(PETSC_HAVE_MUMPS)
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3352 #endif
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3354                                      "MatInvertBlockDiagonal_SeqBAIJ",
3355                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3357                                      "MatStoreValues_SeqBAIJ",
3358                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3360                                      "MatRetrieveValues_SeqBAIJ",
3361                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3363                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3364                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3366                                      "MatConvert_SeqBAIJ_SeqAIJ",
3367                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3369                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3370                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3371   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3372                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3373                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3374   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3375                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3376                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3378                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3379                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3381                                      "MatIsTranspose_SeqBAIJ",
3382                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3383   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3384   PetscFunctionReturn(0);
3385 }
3386 EXTERN_C_END
3387 
3388 #undef __FUNCT__
3389 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3390 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3391 {
3392   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3393   PetscErrorCode ierr;
3394   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3395 
3396   PetscFunctionBegin;
3397   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3398 
3399   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3400     c->imax = a->imax;
3401     c->ilen = a->ilen;
3402     c->free_imax_ilen = PETSC_FALSE;
3403   } else {
3404     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3405     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3406     for (i=0; i<mbs; i++) {
3407       c->imax[i] = a->imax[i];
3408       c->ilen[i] = a->ilen[i];
3409     }
3410     c->free_imax_ilen = PETSC_TRUE;
3411   }
3412 
3413   /* allocate the matrix space */
3414   if (mallocmatspace){
3415     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3416       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3417       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3418       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3419       c->i            = a->i;
3420       c->j            = a->j;
3421       c->singlemalloc = PETSC_FALSE;
3422       c->free_a       = PETSC_TRUE;
3423       c->free_ij      = PETSC_FALSE;
3424       c->parent       = A;
3425       C->preallocated = PETSC_TRUE;
3426       C->assembled    = PETSC_TRUE;
3427       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3428       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3429       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3430     } else {
3431       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3432       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3433       c->singlemalloc = PETSC_TRUE;
3434       c->free_a       = PETSC_TRUE;
3435       c->free_ij      = PETSC_TRUE;
3436       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3437       if (mbs > 0) {
3438 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3439 	if (cpvalues == MAT_COPY_VALUES) {
3440 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3441 	} else {
3442 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3443 	}
3444       }
3445       C->preallocated = PETSC_TRUE;
3446       C->assembled    = PETSC_TRUE;
3447     }
3448   }
3449 
3450   c->roworiented = a->roworiented;
3451   c->nonew       = a->nonew;
3452   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3453   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3454   c->bs2         = a->bs2;
3455   c->mbs         = a->mbs;
3456   c->nbs         = a->nbs;
3457 
3458   if (a->diag) {
3459     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3460       c->diag      = a->diag;
3461       c->free_diag = PETSC_FALSE;
3462     } else {
3463       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3464       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3465       for (i=0; i<mbs; i++) {
3466         c->diag[i] = a->diag[i];
3467       }
3468       c->free_diag = PETSC_TRUE;
3469     }
3470   } else c->diag        = 0;
3471   c->nz                 = a->nz;
3472   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3473   c->solve_work         = 0;
3474   c->mult_work          = 0;
3475 
3476   c->compressedrow.use     = a->compressedrow.use;
3477   c->compressedrow.nrows   = a->compressedrow.nrows;
3478   c->compressedrow.check   = a->compressedrow.check;
3479   if (a->compressedrow.use){
3480     i = a->compressedrow.nrows;
3481     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3482     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3483     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3484     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3485   } else {
3486     c->compressedrow.use    = PETSC_FALSE;
3487     c->compressedrow.i      = PETSC_NULL;
3488     c->compressedrow.rindex = PETSC_NULL;
3489   }
3490   C->same_nonzero = A->same_nonzero;
3491   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3492   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3493   PetscFunctionReturn(0);
3494 }
3495 
3496 #undef __FUNCT__
3497 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3498 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3499 {
3500     PetscErrorCode ierr;
3501 
3502   PetscFunctionBegin;
3503   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3504   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3505   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3506   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3507   PetscFunctionReturn(0);
3508 }
3509 
3510 #undef __FUNCT__
3511 #define __FUNCT__ "MatLoad_SeqBAIJ"
3512 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3513 {
3514   Mat_SeqBAIJ    *a;
3515   PetscErrorCode ierr;
3516   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3517   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3518   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3519   PetscInt       *masked,nmask,tmp,bs2,ishift;
3520   PetscMPIInt    size;
3521   int            fd;
3522   PetscScalar    *aa;
3523   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3524 
3525   PetscFunctionBegin;
3526   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3527   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3528   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3529   bs2  = bs*bs;
3530 
3531   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3532   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3533   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3534   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3535   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3536   M = header[1]; N = header[2]; nz = header[3];
3537 
3538   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3539   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3540 
3541   /*
3542      This code adds extra rows to make sure the number of rows is
3543     divisible by the blocksize
3544   */
3545   mbs        = M/bs;
3546   extra_rows = bs - M + bs*(mbs);
3547   if (extra_rows == bs) extra_rows = 0;
3548   else                  mbs++;
3549   if (extra_rows) {
3550     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3551   }
3552 
3553   /* Set global sizes if not already set */
3554   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3555     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3556   } else { /* Check if the matrix global sizes are correct */
3557     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3558     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3559       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3560     }
3561     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3562   }
3563 
3564   /* read in row lengths */
3565   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3566   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3567   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3568 
3569   /* read in column indices */
3570   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3571   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3572   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3573 
3574   /* loop over row lengths determining block row lengths */
3575   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3576   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3577   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3578   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3579   rowcount = 0;
3580   nzcount = 0;
3581   for (i=0; i<mbs; i++) {
3582     nmask = 0;
3583     for (j=0; j<bs; j++) {
3584       kmax = rowlengths[rowcount];
3585       for (k=0; k<kmax; k++) {
3586         tmp = jj[nzcount++]/bs;
3587         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3588       }
3589       rowcount++;
3590     }
3591     browlengths[i] += nmask;
3592     /* zero out the mask elements we set */
3593     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3594   }
3595 
3596   /* Do preallocation  */
3597   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3598   a = (Mat_SeqBAIJ*)newmat->data;
3599 
3600   /* set matrix "i" values */
3601   a->i[0] = 0;
3602   for (i=1; i<= mbs; i++) {
3603     a->i[i]      = a->i[i-1] + browlengths[i-1];
3604     a->ilen[i-1] = browlengths[i-1];
3605   }
3606   a->nz         = 0;
3607   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3608 
3609   /* read in nonzero values */
3610   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3611   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3612   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3613 
3614   /* set "a" and "j" values into matrix */
3615   nzcount = 0; jcount = 0;
3616   for (i=0; i<mbs; i++) {
3617     nzcountb = nzcount;
3618     nmask    = 0;
3619     for (j=0; j<bs; j++) {
3620       kmax = rowlengths[i*bs+j];
3621       for (k=0; k<kmax; k++) {
3622         tmp = jj[nzcount++]/bs;
3623 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3624       }
3625     }
3626     /* sort the masked values */
3627     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3628 
3629     /* set "j" values into matrix */
3630     maskcount = 1;
3631     for (j=0; j<nmask; j++) {
3632       a->j[jcount++]  = masked[j];
3633       mask[masked[j]] = maskcount++;
3634     }
3635     /* set "a" values into matrix */
3636     ishift = bs2*a->i[i];
3637     for (j=0; j<bs; j++) {
3638       kmax = rowlengths[i*bs+j];
3639       for (k=0; k<kmax; k++) {
3640         tmp       = jj[nzcountb]/bs ;
3641         block     = mask[tmp] - 1;
3642         point     = jj[nzcountb] - bs*tmp;
3643         idx       = ishift + bs2*block + j + bs*point;
3644         a->a[idx] = (MatScalar)aa[nzcountb++];
3645       }
3646     }
3647     /* zero out the mask elements we set */
3648     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3649   }
3650   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3651 
3652   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3653   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3654   ierr = PetscFree(aa);CHKERRQ(ierr);
3655   ierr = PetscFree(jj);CHKERRQ(ierr);
3656   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3657 
3658   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3659   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3660   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3661   PetscFunctionReturn(0);
3662 }
3663 
3664 #undef __FUNCT__
3665 #define __FUNCT__ "MatCreateSeqBAIJ"
3666 /*@C
3667    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3668    compressed row) format.  For good matrix assembly performance the
3669    user should preallocate the matrix storage by setting the parameter nz
3670    (or the array nnz).  By setting these parameters accurately, performance
3671    during matrix assembly can be increased by more than a factor of 50.
3672 
3673    Collective on MPI_Comm
3674 
3675    Input Parameters:
3676 +  comm - MPI communicator, set to PETSC_COMM_SELF
3677 .  bs - size of block
3678 .  m - number of rows
3679 .  n - number of columns
3680 .  nz - number of nonzero blocks  per block row (same for all rows)
3681 -  nnz - array containing the number of nonzero blocks in the various block rows
3682          (possibly different for each block row) or PETSC_NULL
3683 
3684    Output Parameter:
3685 .  A - the matrix
3686 
3687    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3688    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3689    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3690 
3691    Options Database Keys:
3692 .   -mat_no_unroll - uses code that does not unroll the loops in the
3693                      block calculations (much slower)
3694 .    -mat_block_size - size of the blocks to use
3695 
3696    Level: intermediate
3697 
3698    Notes:
3699    The number of rows and columns must be divisible by blocksize.
3700 
3701    If the nnz parameter is given then the nz parameter is ignored
3702 
3703    A nonzero block is any block that as 1 or more nonzeros in it
3704 
3705    The block AIJ format is fully compatible with standard Fortran 77
3706    storage.  That is, the stored row and column indices can begin at
3707    either one (as in Fortran) or zero.  See the users' manual for details.
3708 
3709    Specify the preallocated storage with either nz or nnz (not both).
3710    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3711    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3712    matrices.
3713 
3714 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3715 @*/
3716 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3717 {
3718   PetscErrorCode ierr;
3719 
3720   PetscFunctionBegin;
3721   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3722   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3723   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3724   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3725   PetscFunctionReturn(0);
3726 }
3727 
3728 #undef __FUNCT__
3729 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3730 /*@C
3731    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3732    per row in the matrix. For good matrix assembly performance the
3733    user should preallocate the matrix storage by setting the parameter nz
3734    (or the array nnz).  By setting these parameters accurately, performance
3735    during matrix assembly can be increased by more than a factor of 50.
3736 
3737    Collective on MPI_Comm
3738 
3739    Input Parameters:
3740 +  A - the matrix
3741 .  bs - size of block
3742 .  nz - number of block nonzeros per block row (same for all rows)
3743 -  nnz - array containing the number of block nonzeros in the various block rows
3744          (possibly different for each block row) or PETSC_NULL
3745 
3746    Options Database Keys:
3747 .   -mat_no_unroll - uses code that does not unroll the loops in the
3748                      block calculations (much slower)
3749 .    -mat_block_size - size of the blocks to use
3750 
3751    Level: intermediate
3752 
3753    Notes:
3754    If the nnz parameter is given then the nz parameter is ignored
3755 
3756    You can call MatGetInfo() to get information on how effective the preallocation was;
3757    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3758    You can also run with the option -info and look for messages with the string
3759    malloc in them to see if additional memory allocation was needed.
3760 
3761    The block AIJ format is fully compatible with standard Fortran 77
3762    storage.  That is, the stored row and column indices can begin at
3763    either one (as in Fortran) or zero.  See the users' manual for details.
3764 
3765    Specify the preallocated storage with either nz or nnz (not both).
3766    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3767    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3768 
3769 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3770 @*/
3771 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3772 {
3773   PetscErrorCode ierr;
3774 
3775   PetscFunctionBegin;
3776   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3777   PetscValidType(B,1);
3778   PetscValidLogicalCollectiveInt(B,bs,2);
3779   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3780   PetscFunctionReturn(0);
3781 }
3782 
3783 #undef __FUNCT__
3784 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3785 /*@C
3786    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3787    (the default sequential PETSc format).
3788 
3789    Collective on MPI_Comm
3790 
3791    Input Parameters:
3792 +  A - the matrix
3793 .  i - the indices into j for the start of each local row (starts with zero)
3794 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3795 -  v - optional values in the matrix
3796 
3797    Level: developer
3798 
3799 .keywords: matrix, aij, compressed row, sparse
3800 
3801 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3802 @*/
3803 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3804 {
3805   PetscErrorCode ierr;
3806 
3807   PetscFunctionBegin;
3808   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3809   PetscValidType(B,1);
3810   PetscValidLogicalCollectiveInt(B,bs,2);
3811   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3812   PetscFunctionReturn(0);
3813 }
3814 
3815 
3816 #undef __FUNCT__
3817 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3818 /*@
3819      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3820 
3821      Collective on MPI_Comm
3822 
3823    Input Parameters:
3824 +  comm - must be an MPI communicator of size 1
3825 .  bs - size of block
3826 .  m - number of rows
3827 .  n - number of columns
3828 .  i - row indices
3829 .  j - column indices
3830 -  a - matrix values
3831 
3832    Output Parameter:
3833 .  mat - the matrix
3834 
3835    Level: advanced
3836 
3837    Notes:
3838        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3839     once the matrix is destroyed
3840 
3841        You cannot set new nonzero locations into this matrix, that will generate an error.
3842 
3843        The i and j indices are 0 based
3844 
3845        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3846 
3847 
3848 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3849 
3850 @*/
3851 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3852 {
3853   PetscErrorCode ierr;
3854   PetscInt       ii;
3855   Mat_SeqBAIJ    *baij;
3856 
3857   PetscFunctionBegin;
3858   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3859   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3860 
3861   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3862   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3863   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3864   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3865   baij = (Mat_SeqBAIJ*)(*mat)->data;
3866   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3867   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3868 
3869   baij->i = i;
3870   baij->j = j;
3871   baij->a = a;
3872   baij->singlemalloc = PETSC_FALSE;
3873   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3874   baij->free_a       = PETSC_FALSE;
3875   baij->free_ij      = PETSC_FALSE;
3876 
3877   for (ii=0; ii<m; ii++) {
3878     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3879 #if defined(PETSC_USE_DEBUG)
3880     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3881 #endif
3882   }
3883 #if defined(PETSC_USE_DEBUG)
3884   for (ii=0; ii<baij->i[m]; ii++) {
3885     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3886     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3887   }
3888 #endif
3889 
3890   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3891   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3892   PetscFunctionReturn(0);
3893 }
3894