xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2f4102e2ffe0d79384eeea78255736a9b8146751)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269 	break;
1270       }
1271     }
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1278 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       i,j,m = a->mbs;
1283 
1284   PetscFunctionBegin;
1285   if (!a->diag) {
1286     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1287     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1288     a->free_diag = PETSC_TRUE;
1289   }
1290   for (i=0; i<m; i++) {
1291     a->diag[i] = a->i[i+1];
1292     for (j=a->i[i]; j<a->i[i+1]; j++) {
1293       if (a->j[j] == i) {
1294         a->diag[i] = j;
1295         break;
1296       }
1297     }
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 
1303 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       *tia, *tja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (ja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353 	    }
1354           }
1355         }
1356       }
1357     }
1358 
1359     n     *= bs;
1360     nz *= bs*bs;
1361     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1362       ierr = PetscFree(tia);CHKERRQ(ierr);
1363       ierr = PetscFree(tja);CHKERRQ(ierr);
1364     }
1365   } else if (oshift == 1) {
1366     if (symmetric) {
1367       PetscInt nz = tia[A->rmap->n/bs];
1368       /*  add 1 to i and j indices */
1369       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1370       *ia = tia;
1371       if (ja) {
1372 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1373         *ja = tja;
1374       }
1375     } else {
1376       PetscInt nz = a->i[A->rmap->n/bs];
1377       /* malloc space and  add 1 to i and j indices */
1378       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1379       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1380       if (ja) {
1381 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1382 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1383       }
1384     }
1385   } else {
1386     *ia = tia;
1387     if (ja) *ja = tja;
1388   }
1389 
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 #undef __FUNCT__
1394 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1395 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1396 {
1397   PetscErrorCode ierr;
1398 
1399   PetscFunctionBegin;
1400   if (!ia) PetscFunctionReturn(0);
1401   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1402     ierr = PetscFree(*ia);CHKERRQ(ierr);
1403     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1410 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1411 {
1412   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1413   PetscErrorCode ierr;
1414 
1415   PetscFunctionBegin;
1416 #if defined(PETSC_USE_LOG)
1417   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1418 #endif
1419   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1420   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1421   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1422   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1423   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1424   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1425   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1426   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1427   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1428   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1429   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1430   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1431   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1432 
1433   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1434   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1435   ierr = PetscFree(A->data);CHKERRQ(ierr);
1436 
1437   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1447   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 
1451 #undef __FUNCT__
1452 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1453 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1454 {
1455   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1456   PetscErrorCode ierr;
1457 
1458   PetscFunctionBegin;
1459   switch (op) {
1460   case MAT_ROW_ORIENTED:
1461     a->roworiented    = flg;
1462     break;
1463   case MAT_KEEP_NONZERO_PATTERN:
1464     a->keepnonzeropattern = flg;
1465     break;
1466   case MAT_NEW_NONZERO_LOCATIONS:
1467     a->nonew          = (flg ? 0 : 1);
1468     break;
1469   case MAT_NEW_NONZERO_LOCATION_ERR:
1470     a->nonew          = (flg ? -1 : 0);
1471     break;
1472   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1473     a->nonew          = (flg ? -2 : 0);
1474     break;
1475   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1476     a->nounused       = (flg ? -1 : 0);
1477     break;
1478   case MAT_CHECK_COMPRESSED_ROW:
1479     a->compressedrow.check = flg;
1480     break;
1481   case MAT_NEW_DIAGONALS:
1482   case MAT_IGNORE_OFF_PROC_ENTRIES:
1483   case MAT_USE_HASH_TABLE:
1484     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1485     break;
1486   case MAT_SYMMETRIC:
1487   case MAT_STRUCTURALLY_SYMMETRIC:
1488   case MAT_HERMITIAN:
1489   case MAT_SYMMETRY_ETERNAL:
1490     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1491     break;
1492   default:
1493     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1494   }
1495   PetscFunctionReturn(0);
1496 }
1497 
1498 #undef __FUNCT__
1499 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1500 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1501 {
1502   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode ierr;
1504   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1505   MatScalar      *aa,*aa_i;
1506   PetscScalar    *v_i;
1507 
1508   PetscFunctionBegin;
1509   bs  = A->rmap->bs;
1510   ai  = a->i;
1511   aj  = a->j;
1512   aa  = a->a;
1513   bs2 = a->bs2;
1514 
1515   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1516 
1517   bn  = row/bs;   /* Block number */
1518   bp  = row % bs; /* Block Position */
1519   M   = ai[bn+1] - ai[bn];
1520   *nz = bs*M;
1521 
1522   if (v) {
1523     *v = 0;
1524     if (*nz) {
1525       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1526       for (i=0; i<M; i++) { /* for each block in the block row */
1527         v_i  = *v + i*bs;
1528         aa_i = aa + bs2*(ai[bn] + i);
1529         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1530       }
1531     }
1532   }
1533 
1534   if (idx) {
1535     *idx = 0;
1536     if (*nz) {
1537       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1538       for (i=0; i<M; i++) { /* for each block in the block row */
1539         idx_i = *idx + i*bs;
1540         itmp  = bs*aj[ai[bn] + i];
1541         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1542       }
1543     }
1544   }
1545   PetscFunctionReturn(0);
1546 }
1547 
1548 #undef __FUNCT__
1549 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1550 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1551 {
1552   PetscErrorCode ierr;
1553 
1554   PetscFunctionBegin;
1555   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1556   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1557   PetscFunctionReturn(0);
1558 }
1559 
1560 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1561 
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1564 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1565 {
1566   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1567   Mat            C;
1568   PetscErrorCode ierr;
1569   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1570   PetscInt       *rows,*cols,bs2=a->bs2;
1571   MatScalar      *array;
1572 
1573   PetscFunctionBegin;
1574   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1575   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1576     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1577     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1578 
1579     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1580     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1581     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1582     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1583     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1584     ierr = PetscFree(col);CHKERRQ(ierr);
1585   } else {
1586     C = *B;
1587   }
1588 
1589   array = a->a;
1590   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1591   for (i=0; i<mbs; i++) {
1592     cols[0] = i*bs;
1593     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1594     len = ai[i+1] - ai[i];
1595     for (j=0; j<len; j++) {
1596       rows[0] = (*aj++)*bs;
1597       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1598       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1599       array += bs2;
1600     }
1601   }
1602   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1603 
1604   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1606 
1607   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1608     *B = C;
1609   } else {
1610     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1611   }
1612   PetscFunctionReturn(0);
1613 }
1614 
1615 EXTERN_C_BEGIN
1616 #undef __FUNCT__
1617 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1618 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1619 {
1620   PetscErrorCode ierr;
1621   Mat            Btrans;
1622 
1623   PetscFunctionBegin;
1624   *f = PETSC_FALSE;
1625   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1626   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1627   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1628   PetscFunctionReturn(0);
1629 }
1630 EXTERN_C_END
1631 
1632 #undef __FUNCT__
1633 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1634 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1635 {
1636   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1637   PetscErrorCode ierr;
1638   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1639   int            fd;
1640   PetscScalar    *aa;
1641   FILE           *file;
1642 
1643   PetscFunctionBegin;
1644   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1645   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1646   col_lens[0] = MAT_FILE_CLASSID;
1647 
1648   col_lens[1] = A->rmap->N;
1649   col_lens[2] = A->cmap->n;
1650   col_lens[3] = a->nz*bs2;
1651 
1652   /* store lengths of each row and write (including header) to file */
1653   count = 0;
1654   for (i=0; i<a->mbs; i++) {
1655     for (j=0; j<bs; j++) {
1656       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1657     }
1658   }
1659   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1660   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1661 
1662   /* store column indices (zero start index) */
1663   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1664   count = 0;
1665   for (i=0; i<a->mbs; i++) {
1666     for (j=0; j<bs; j++) {
1667       for (k=a->i[i]; k<a->i[i+1]; k++) {
1668         for (l=0; l<bs; l++) {
1669           jj[count++] = bs*a->j[k] + l;
1670         }
1671       }
1672     }
1673   }
1674   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1675   ierr = PetscFree(jj);CHKERRQ(ierr);
1676 
1677   /* store nonzero values */
1678   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1679   count = 0;
1680   for (i=0; i<a->mbs; i++) {
1681     for (j=0; j<bs; j++) {
1682       for (k=a->i[i]; k<a->i[i+1]; k++) {
1683         for (l=0; l<bs; l++) {
1684           aa[count++] = a->a[bs2*k + l*bs + j];
1685         }
1686       }
1687     }
1688   }
1689   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1690   ierr = PetscFree(aa);CHKERRQ(ierr);
1691 
1692   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1693   if (file) {
1694     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1701 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1702 {
1703   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1704   PetscErrorCode    ierr;
1705   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1706   PetscViewerFormat format;
1707 
1708   PetscFunctionBegin;
1709   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1710   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1711     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1712   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1713     Mat aij;
1714     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1715     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1716     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1717   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1718      PetscFunctionReturn(0);
1719   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1720     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1721     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1722     for (i=0; i<a->mbs; i++) {
1723       for (j=0; j<bs; j++) {
1724         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1725         for (k=a->i[i]; k<a->i[i+1]; k++) {
1726           for (l=0; l<bs; l++) {
1727 #if defined(PETSC_USE_COMPLEX)
1728             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1729               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1730                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1731             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1733                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1736             }
1737 #else
1738             if (a->a[bs2*k + l*bs + j] != 0.0) {
1739               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1740             }
1741 #endif
1742           }
1743         }
1744         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1745       }
1746     }
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1748   } else {
1749     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1750     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1751     for (i=0; i<a->mbs; i++) {
1752       for (j=0; j<bs; j++) {
1753         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1754         for (k=a->i[i]; k<a->i[i+1]; k++) {
1755           for (l=0; l<bs; l++) {
1756 #if defined(PETSC_USE_COMPLEX)
1757             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1758               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1759                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1760             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1761               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1762                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             } else {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1765             }
1766 #else
1767             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1768 #endif
1769           }
1770         }
1771         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1772       }
1773     }
1774     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1775   }
1776   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1782 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1783 {
1784   Mat            A = (Mat) Aa;
1785   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1786   PetscErrorCode ierr;
1787   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1788   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1789   MatScalar      *aa;
1790   PetscViewer    viewer;
1791   PetscViewerFormat format;
1792 
1793   PetscFunctionBegin;
1794   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1795   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1796 
1797   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1798 
1799   /* loop over matrix elements drawing boxes */
1800 
1801   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1802     color = PETSC_DRAW_BLUE;
1803     for (i=0,row=0; i<mbs; i++,row+=bs) {
1804       for (j=a->i[i]; j<a->i[i+1]; j++) {
1805         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1806         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1807         aa = a->a + j*bs2;
1808         for (k=0; k<bs; k++) {
1809           for (l=0; l<bs; l++) {
1810             if (PetscRealPart(*aa++) >=  0.) continue;
1811             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1812           }
1813         }
1814       }
1815     }
1816     color = PETSC_DRAW_CYAN;
1817     for (i=0,row=0; i<mbs; i++,row+=bs) {
1818       for (j=a->i[i]; j<a->i[i+1]; j++) {
1819         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1820         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1821         aa = a->a + j*bs2;
1822         for (k=0; k<bs; k++) {
1823           for (l=0; l<bs; l++) {
1824             if (PetscRealPart(*aa++) != 0.) continue;
1825             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1826           }
1827         }
1828       }
1829     }
1830     color = PETSC_DRAW_RED;
1831     for (i=0,row=0; i<mbs; i++,row+=bs) {
1832       for (j=a->i[i]; j<a->i[i+1]; j++) {
1833         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1834         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1835         aa = a->a + j*bs2;
1836         for (k=0; k<bs; k++) {
1837           for (l=0; l<bs; l++) {
1838             if (PetscRealPart(*aa++) <= 0.) continue;
1839             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1840           }
1841         }
1842       }
1843     }
1844   } else {
1845     /* use contour shading to indicate magnitude of values */
1846     /* first determine max of all nonzero values */
1847     PetscDraw   popup;
1848     PetscReal scale,maxv = 0.0;
1849 
1850     for (i=0; i<a->nz*a->bs2; i++) {
1851       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1852     }
1853     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1854     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1855     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1856     for (i=0,row=0; i<mbs; i++,row+=bs) {
1857       for (j=a->i[i]; j<a->i[i+1]; j++) {
1858         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1859         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1860         aa = a->a + j*bs2;
1861         for (k=0; k<bs; k++) {
1862           for (l=0; l<bs; l++) {
1863             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1864             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1865           }
1866         }
1867       }
1868     }
1869   }
1870   PetscFunctionReturn(0);
1871 }
1872 
1873 #undef __FUNCT__
1874 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1875 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1876 {
1877   PetscErrorCode ierr;
1878   PetscReal      xl,yl,xr,yr,w,h;
1879   PetscDraw      draw;
1880   PetscBool      isnull;
1881 
1882   PetscFunctionBegin;
1883 
1884   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1885   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1886 
1887   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1888   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1889   xr += w;    yr += h;  xl = -w;     yl = -h;
1890   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1891   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1892   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 #undef __FUNCT__
1897 #define __FUNCT__ "MatView_SeqBAIJ"
1898 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1899 {
1900   PetscErrorCode ierr;
1901   PetscBool      iascii,isbinary,isdraw;
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1905   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1906   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1907   if (iascii){
1908     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1909   } else if (isbinary) {
1910     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1911   } else if (isdraw) {
1912     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1913   } else {
1914     Mat B;
1915     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1916     ierr = MatView(B,viewer);CHKERRQ(ierr);
1917     ierr = MatDestroy(&B);CHKERRQ(ierr);
1918   }
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 
1923 #undef __FUNCT__
1924 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1925 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1926 {
1927   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1928   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1929   PetscInt    *ai = a->i,*ailen = a->ilen;
1930   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1931   MatScalar   *ap,*aa = a->a;
1932 
1933   PetscFunctionBegin;
1934   for (k=0; k<m; k++) { /* loop over rows */
1935     row  = im[k]; brow = row/bs;
1936     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1937     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1938     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1939     nrow = ailen[brow];
1940     for (l=0; l<n; l++) { /* loop over columns */
1941       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1942       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1943       col  = in[l] ;
1944       bcol = col/bs;
1945       cidx = col%bs;
1946       ridx = row%bs;
1947       high = nrow;
1948       low  = 0; /* assume unsorted */
1949       while (high-low > 5) {
1950         t = (low+high)/2;
1951         if (rp[t] > bcol) high = t;
1952         else             low  = t;
1953       }
1954       for (i=low; i<high; i++) {
1955         if (rp[i] > bcol) break;
1956         if (rp[i] == bcol) {
1957           *v++ = ap[bs2*i+bs*cidx+ridx];
1958           goto finished;
1959         }
1960       }
1961       *v++ = 0.0;
1962       finished:;
1963     }
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1970 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1971 {
1972   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1973   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1974   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1975   PetscErrorCode    ierr;
1976   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1977   PetscBool         roworiented=a->roworiented;
1978   const PetscScalar *value = v;
1979   MatScalar         *ap,*aa = a->a,*bap;
1980 
1981   PetscFunctionBegin;
1982   if (roworiented) {
1983     stepval = (n-1)*bs;
1984   } else {
1985     stepval = (m-1)*bs;
1986   }
1987   for (k=0; k<m; k++) { /* loop over added rows */
1988     row  = im[k];
1989     if (row < 0) continue;
1990 #if defined(PETSC_USE_DEBUG)
1991     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1992 #endif
1993     rp   = aj + ai[row];
1994     ap   = aa + bs2*ai[row];
1995     rmax = imax[row];
1996     nrow = ailen[row];
1997     low  = 0;
1998     high = nrow;
1999     for (l=0; l<n; l++) { /* loop over added columns */
2000       if (in[l] < 0) continue;
2001 #if defined(PETSC_USE_DEBUG)
2002       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2003 #endif
2004       col = in[l];
2005       if (roworiented) {
2006         value = v + (k*(stepval+bs) + l)*bs;
2007       } else {
2008         value = v + (l*(stepval+bs) + k)*bs;
2009       }
2010       if (col <= lastcol) low = 0; else high = nrow;
2011       lastcol = col;
2012       while (high-low > 7) {
2013         t = (low+high)/2;
2014         if (rp[t] > col) high = t;
2015         else             low  = t;
2016       }
2017       for (i=low; i<high; i++) {
2018         if (rp[i] > col) break;
2019         if (rp[i] == col) {
2020           bap  = ap +  bs2*i;
2021           if (roworiented) {
2022             if (is == ADD_VALUES) {
2023               for (ii=0; ii<bs; ii++,value+=stepval) {
2024                 for (jj=ii; jj<bs2; jj+=bs) {
2025                   bap[jj] += *value++;
2026                 }
2027               }
2028             } else {
2029               for (ii=0; ii<bs; ii++,value+=stepval) {
2030                 for (jj=ii; jj<bs2; jj+=bs) {
2031                   bap[jj] = *value++;
2032                 }
2033               }
2034             }
2035           } else {
2036             if (is == ADD_VALUES) {
2037               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2038                 for (jj=0; jj<bs; jj++) {
2039                   bap[jj] += value[jj];
2040                 }
2041                 bap += bs;
2042               }
2043             } else {
2044               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2045                 for (jj=0; jj<bs; jj++) {
2046                   bap[jj]  = value[jj];
2047                 }
2048                 bap += bs;
2049               }
2050             }
2051           }
2052           goto noinsert2;
2053         }
2054       }
2055       if (nonew == 1) goto noinsert2;
2056       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2057       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2058       N = nrow++ - 1; high++;
2059       /* shift up all the later entries in this row */
2060       for (ii=N; ii>=i; ii--) {
2061         rp[ii+1] = rp[ii];
2062         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2063       }
2064       if (N >= i) {
2065         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2066       }
2067       rp[i] = col;
2068       bap   = ap +  bs2*i;
2069       if (roworiented) {
2070         for (ii=0; ii<bs; ii++,value+=stepval) {
2071           for (jj=ii; jj<bs2; jj+=bs) {
2072             bap[jj] = *value++;
2073           }
2074         }
2075       } else {
2076         for (ii=0; ii<bs; ii++,value+=stepval) {
2077           for (jj=0; jj<bs; jj++) {
2078             *bap++  = *value++;
2079           }
2080         }
2081       }
2082       noinsert2:;
2083       low = i;
2084     }
2085     ailen[row] = nrow;
2086   }
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #undef __FUNCT__
2091 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2092 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2093 {
2094   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2095   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2096   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2097   PetscErrorCode ierr;
2098   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2099   MatScalar      *aa = a->a,*ap;
2100   PetscReal      ratio=0.6;
2101 
2102   PetscFunctionBegin;
2103   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2104 
2105   if (m) rmax = ailen[0];
2106   for (i=1; i<mbs; i++) {
2107     /* move each row back by the amount of empty slots (fshift) before it*/
2108     fshift += imax[i-1] - ailen[i-1];
2109     rmax   = PetscMax(rmax,ailen[i]);
2110     if (fshift) {
2111       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2112       N = ailen[i];
2113       for (j=0; j<N; j++) {
2114         ip[j-fshift] = ip[j];
2115         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2116       }
2117     }
2118     ai[i] = ai[i-1] + ailen[i-1];
2119   }
2120   if (mbs) {
2121     fshift += imax[mbs-1] - ailen[mbs-1];
2122     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2123   }
2124   /* reset ilen and imax for each row */
2125   for (i=0; i<mbs; i++) {
2126     ailen[i] = imax[i] = ai[i+1] - ai[i];
2127   }
2128   a->nz = ai[mbs];
2129 
2130   /* diagonals may have moved, so kill the diagonal pointers */
2131   a->idiagvalid = PETSC_FALSE;
2132   if (fshift && a->diag) {
2133     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2134     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2135     a->diag = 0;
2136   }
2137   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2138   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2139   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2140   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2141   A->info.mallocs     += a->reallocs;
2142   a->reallocs          = 0;
2143   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2144 
2145   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2146   A->same_nonzero = PETSC_TRUE;
2147   PetscFunctionReturn(0);
2148 }
2149 
2150 /*
2151    This function returns an array of flags which indicate the locations of contiguous
2152    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2153    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2154    Assume: sizes should be long enough to hold all the values.
2155 */
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2158 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2159 {
2160   PetscInt   i,j,k,row;
2161   PetscBool  flg;
2162 
2163   PetscFunctionBegin;
2164   for (i=0,j=0; i<n; j++) {
2165     row = idx[i];
2166     if (row%bs!=0) { /* Not the begining of a block */
2167       sizes[j] = 1;
2168       i++;
2169     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2170       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2171       i++;
2172     } else { /* Begining of the block, so check if the complete block exists */
2173       flg = PETSC_TRUE;
2174       for (k=1; k<bs; k++) {
2175         if (row+k != idx[i+k]) { /* break in the block */
2176           flg = PETSC_FALSE;
2177           break;
2178         }
2179       }
2180       if (flg) { /* No break in the bs */
2181         sizes[j] = bs;
2182         i+= bs;
2183       } else {
2184         sizes[j] = 1;
2185         i++;
2186       }
2187     }
2188   }
2189   *bs_max = j;
2190   PetscFunctionReturn(0);
2191 }
2192 
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2195 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2196 {
2197   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2198   PetscErrorCode    ierr;
2199   PetscInt          i,j,k,count,*rows;
2200   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2201   PetscScalar       zero = 0.0;
2202   MatScalar         *aa;
2203   const PetscScalar *xx;
2204   PetscScalar       *bb;
2205 
2206   PetscFunctionBegin;
2207   /* fix right hand side if needed */
2208   if (x && b) {
2209     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2210     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2211     for (i=0; i<is_n; i++) {
2212       bb[is_idx[i]] = diag*xx[is_idx[i]];
2213     }
2214     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2215     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2216   }
2217 
2218   /* Make a copy of the IS and  sort it */
2219   /* allocate memory for rows,sizes */
2220   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2221 
2222   /* copy IS values to rows, and sort them */
2223   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2224   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2225 
2226   if (baij->keepnonzeropattern) {
2227     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2228     bs_max = is_n;
2229     A->same_nonzero = PETSC_TRUE;
2230   } else {
2231     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2232     A->same_nonzero = PETSC_FALSE;
2233   }
2234 
2235   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2236     row   = rows[j];
2237     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2238     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2239     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2240     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2241       if (diag != (PetscScalar)0.0) {
2242         if (baij->ilen[row/bs] > 0) {
2243           baij->ilen[row/bs]       = 1;
2244           baij->j[baij->i[row/bs]] = row/bs;
2245           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2246         }
2247         /* Now insert all the diagonal values for this bs */
2248         for (k=0; k<bs; k++) {
2249           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2250         }
2251       } else { /* (diag == 0.0) */
2252         baij->ilen[row/bs] = 0;
2253       } /* end (diag == 0.0) */
2254     } else { /* (sizes[i] != bs) */
2255 #if defined (PETSC_USE_DEBUG)
2256       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2257 #endif
2258       for (k=0; k<count; k++) {
2259         aa[0] =  zero;
2260         aa    += bs;
2261       }
2262       if (diag != (PetscScalar)0.0) {
2263         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2264       }
2265     }
2266   }
2267 
2268   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2269   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 #undef __FUNCT__
2274 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2275 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2276 {
2277   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2278   PetscErrorCode    ierr;
2279   PetscInt          i,j,k,count;
2280   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2281   PetscScalar       zero = 0.0;
2282   MatScalar         *aa;
2283   const PetscScalar *xx;
2284   PetscScalar       *bb;
2285   PetscBool         *zeroed,vecs = PETSC_FALSE;
2286 
2287   PetscFunctionBegin;
2288   /* fix right hand side if needed */
2289   if (x && b) {
2290     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2291     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2292     vecs = PETSC_TRUE;
2293   }
2294   A->same_nonzero = PETSC_TRUE;
2295 
2296   /* zero the columns */
2297   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2298   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2299   for (i=0; i<is_n; i++) {
2300     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2301     zeroed[is_idx[i]] = PETSC_TRUE;
2302   }
2303   for (i=0; i<A->rmap->N; i++) {
2304     if (!zeroed[i]) {
2305       row = i/bs;
2306       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2307         for (k=0; k<bs; k++) {
2308           col = bs*baij->j[j] + k;
2309 	  if (zeroed[col]) {
2310 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2311             if (vecs) bb[i] -= aa[0]*xx[col];
2312             aa[0] = 0.0;
2313           }
2314         }
2315       }
2316     } else if (vecs) bb[i] = diag*xx[i];
2317   }
2318   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2319   if (vecs) {
2320     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2321     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2322   }
2323 
2324   /* zero the rows */
2325   for (i=0; i<is_n; i++) {
2326     row   = is_idx[i];
2327     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2328     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2329     for (k=0; k<count; k++) {
2330       aa[0] =  zero;
2331       aa    += bs;
2332     }
2333     if (diag != (PetscScalar)0.0) {
2334       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2335     }
2336   }
2337   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2338   PetscFunctionReturn(0);
2339 }
2340 
2341 #undef __FUNCT__
2342 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2343 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2344 {
2345   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2346   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2347   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2348   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2349   PetscErrorCode ierr;
2350   PetscInt       ridx,cidx,bs2=a->bs2;
2351   PetscBool      roworiented=a->roworiented;
2352   MatScalar      *ap,value,*aa=a->a,*bap;
2353 
2354   PetscFunctionBegin;
2355   if (v) PetscValidScalarPointer(v,6);
2356   for (k=0; k<m; k++) { /* loop over added rows */
2357     row  = im[k];
2358     brow = row/bs;
2359     if (row < 0) continue;
2360 #if defined(PETSC_USE_DEBUG)
2361     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2362 #endif
2363     rp   = aj + ai[brow];
2364     ap   = aa + bs2*ai[brow];
2365     rmax = imax[brow];
2366     nrow = ailen[brow];
2367     low  = 0;
2368     high = nrow;
2369     for (l=0; l<n; l++) { /* loop over added columns */
2370       if (in[l] < 0) continue;
2371 #if defined(PETSC_USE_DEBUG)
2372       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2373 #endif
2374       col = in[l]; bcol = col/bs;
2375       ridx = row % bs; cidx = col % bs;
2376       if (roworiented) {
2377         value = v[l + k*n];
2378       } else {
2379         value = v[k + l*m];
2380       }
2381       if (col <= lastcol) low = 0; else high = nrow;
2382       lastcol = col;
2383       while (high-low > 7) {
2384         t = (low+high)/2;
2385         if (rp[t] > bcol) high = t;
2386         else              low  = t;
2387       }
2388       for (i=low; i<high; i++) {
2389         if (rp[i] > bcol) break;
2390         if (rp[i] == bcol) {
2391           bap  = ap +  bs2*i + bs*cidx + ridx;
2392           if (is == ADD_VALUES) *bap += value;
2393           else                  *bap  = value;
2394           goto noinsert1;
2395         }
2396       }
2397       if (nonew == 1) goto noinsert1;
2398       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2399       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2400       N = nrow++ - 1; high++;
2401       /* shift up all the later entries in this row */
2402       for (ii=N; ii>=i; ii--) {
2403         rp[ii+1] = rp[ii];
2404         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2405       }
2406       if (N>=i) {
2407         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2408       }
2409       rp[i]                      = bcol;
2410       ap[bs2*i + bs*cidx + ridx] = value;
2411       a->nz++;
2412       noinsert1:;
2413       low = i;
2414     }
2415     ailen[brow] = nrow;
2416   }
2417   A->same_nonzero = PETSC_FALSE;
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2423 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2426   Mat            outA;
2427   PetscErrorCode ierr;
2428   PetscBool      row_identity,col_identity;
2429 
2430   PetscFunctionBegin;
2431   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2432   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2433   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2434   if (!row_identity || !col_identity) {
2435     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2436   }
2437 
2438   outA            = inA;
2439   inA->factortype = MAT_FACTOR_LU;
2440 
2441   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2442 
2443   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2444   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2445   a->row = row;
2446   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2447   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2448   a->col = col;
2449 
2450   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2451   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2452    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2453   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2454 
2455   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2456   if (!a->solve_work) {
2457     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2458     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2459   }
2460   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2461 
2462   PetscFunctionReturn(0);
2463 }
2464 
2465 EXTERN_C_BEGIN
2466 #undef __FUNCT__
2467 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2468 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2469 {
2470   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2471   PetscInt    i,nz,mbs;
2472 
2473   PetscFunctionBegin;
2474   nz  = baij->maxnz;
2475   mbs = baij->mbs;
2476   for (i=0; i<nz; i++) {
2477     baij->j[i] = indices[i];
2478   }
2479   baij->nz = nz;
2480   for (i=0; i<mbs; i++) {
2481     baij->ilen[i] = baij->imax[i];
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 EXTERN_C_END
2486 
2487 #undef __FUNCT__
2488 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2489 /*@
2490     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2491        in the matrix.
2492 
2493   Input Parameters:
2494 +  mat - the SeqBAIJ matrix
2495 -  indices - the column indices
2496 
2497   Level: advanced
2498 
2499   Notes:
2500     This can be called if you have precomputed the nonzero structure of the
2501   matrix and want to provide it to the matrix object to improve the performance
2502   of the MatSetValues() operation.
2503 
2504     You MUST have set the correct numbers of nonzeros per row in the call to
2505   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2506 
2507     MUST be called before any calls to MatSetValues();
2508 
2509 @*/
2510 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2511 {
2512   PetscErrorCode ierr;
2513 
2514   PetscFunctionBegin;
2515   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2516   PetscValidPointer(indices,2);
2517   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2523 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2524 {
2525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2526   PetscErrorCode ierr;
2527   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2528   PetscReal      atmp;
2529   PetscScalar    *x,zero = 0.0;
2530   MatScalar      *aa;
2531   PetscInt       ncols,brow,krow,kcol;
2532 
2533   PetscFunctionBegin;
2534   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2535   bs   = A->rmap->bs;
2536   aa   = a->a;
2537   ai   = a->i;
2538   aj   = a->j;
2539   mbs  = a->mbs;
2540 
2541   ierr = VecSet(v,zero);CHKERRQ(ierr);
2542   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2543   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2544   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2545   for (i=0; i<mbs; i++) {
2546     ncols = ai[1] - ai[0]; ai++;
2547     brow  = bs*i;
2548     for (j=0; j<ncols; j++){
2549       for (kcol=0; kcol<bs; kcol++){
2550         for (krow=0; krow<bs; krow++){
2551           atmp = PetscAbsScalar(*aa);aa++;
2552           row = brow + krow;    /* row index */
2553           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2554           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2555         }
2556       }
2557       aj++;
2558     }
2559   }
2560   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatCopy_SeqBAIJ"
2566 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2567 {
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   /* If the two matrices have the same copy implementation, use fast copy. */
2572   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2573     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2574     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2575     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2576 
2577     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2578     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2579     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2580   } else {
2581     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2582   }
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2588 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2589 {
2590   PetscErrorCode ierr;
2591 
2592   PetscFunctionBegin;
2593   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2594   PetscFunctionReturn(0);
2595 }
2596 
2597 #undef __FUNCT__
2598 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2599 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2600 {
2601   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2602   PetscFunctionBegin;
2603   *array = a->a;
2604   PetscFunctionReturn(0);
2605 }
2606 
2607 #undef __FUNCT__
2608 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2609 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2610 {
2611   PetscFunctionBegin;
2612   PetscFunctionReturn(0);
2613 }
2614 
2615 #undef __FUNCT__
2616 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2617 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2618 {
2619   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2620   PetscErrorCode ierr;
2621   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2622   PetscBLASInt   one=1;
2623 
2624   PetscFunctionBegin;
2625   if (str == SAME_NONZERO_PATTERN) {
2626     PetscScalar alpha = a;
2627     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2628     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2629   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2630     if (y->xtoy && y->XtoY != X) {
2631       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2632       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2633     }
2634     if (!y->xtoy) { /* get xtoy */
2635       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2636       y->XtoY = X;
2637       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2638     }
2639     for (i=0; i<x->nz; i++) {
2640       j = 0;
2641       while (j < bs2){
2642         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2643         j++;
2644       }
2645     }
2646     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2647   } else {
2648     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2649   }
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2655 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2656 {
2657   PetscInt rbs,cbs;
2658   PetscErrorCode ierr;
2659 
2660   PetscFunctionBegin;
2661   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2662   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2663   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2664   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2665   PetscFunctionReturn(0);
2666 }
2667 
2668 #undef __FUNCT__
2669 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2670 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2671 {
2672   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2673   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2674   MatScalar      *aa = a->a;
2675 
2676   PetscFunctionBegin;
2677   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2678   PetscFunctionReturn(0);
2679 }
2680 
2681 #undef __FUNCT__
2682 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2683 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2684 {
2685   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2686   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2687   MatScalar      *aa = a->a;
2688 
2689   PetscFunctionBegin;
2690   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2695 
2696 #undef __FUNCT__
2697 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2698 /*
2699     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2700 */
2701 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2702 {
2703   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2704   PetscErrorCode ierr;
2705   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2706   PetscInt       nz = a->i[m],row,*jj,mr,col;
2707 
2708   PetscFunctionBegin;
2709   *nn = n;
2710   if (!ia) PetscFunctionReturn(0);
2711   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2712   else {
2713     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2714     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2715     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2716     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2717     jj = a->j;
2718     for (i=0; i<nz; i++) {
2719       collengths[jj[i]]++;
2720     }
2721     cia[0] = oshift;
2722     for (i=0; i<n; i++) {
2723       cia[i+1] = cia[i] + collengths[i];
2724     }
2725     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2726     jj   = a->j;
2727     for (row=0; row<m; row++) {
2728       mr = a->i[row+1] - a->i[row];
2729       for (i=0; i<mr; i++) {
2730         col = *jj++;
2731         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2732       }
2733     }
2734     ierr = PetscFree(collengths);CHKERRQ(ierr);
2735     *ia = cia; *ja = cja;
2736   }
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2742 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2743 {
2744   PetscErrorCode ierr;
2745 
2746   PetscFunctionBegin;
2747   if (!ia) PetscFunctionReturn(0);
2748   ierr = PetscFree(*ia);CHKERRQ(ierr);
2749   ierr = PetscFree(*ja);CHKERRQ(ierr);
2750   PetscFunctionReturn(0);
2751 }
2752 
2753 #undef __FUNCT__
2754 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2755 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2756 {
2757   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2758   PetscErrorCode ierr;
2759   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2760   PetscScalar    dx,*y,*xx,*w3_array;
2761   PetscScalar    *vscale_array;
2762   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2763   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2764   void           *fctx = coloring->fctx;
2765   PetscBool      flg = PETSC_FALSE;
2766   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2767   Vec            x1_tmp;
2768 
2769   PetscFunctionBegin;
2770   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2771   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2772   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2773   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2774 
2775   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2776   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2777   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2778   if (flg) {
2779     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2780   } else {
2781     PetscBool  assembled;
2782     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2783     if (assembled) {
2784       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2785     }
2786   }
2787 
2788   x1_tmp = x1;
2789   if (!coloring->vscale){
2790     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2791   }
2792 
2793   /*
2794     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2795     coloring->F for the coarser grids from the finest
2796   */
2797   if (coloring->F) {
2798     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2799     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2800     if (m1 != m2) {
2801       coloring->F = 0;
2802       }
2803     }
2804 
2805   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2806     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2807   }
2808   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2809 
2810   /* Set w1 = F(x1) */
2811   if (coloring->F) {
2812     w1          = coloring->F; /* use already computed value of function */
2813     coloring->F = 0;
2814   } else {
2815     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2816     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2817     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2818   }
2819 
2820   if (!coloring->w3) {
2821     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2822     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2823   }
2824   w3 = coloring->w3;
2825 
2826     CHKMEMQ;
2827     /* Compute all the local scale factors, including ghost points */
2828   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2829   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2830   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2831   if (ctype == IS_COLORING_GHOSTED){
2832     col_start = 0; col_end = N;
2833   } else if (ctype == IS_COLORING_GLOBAL){
2834     xx = xx - start;
2835     vscale_array = vscale_array - start;
2836     col_start = start; col_end = N + start;
2837   }    CHKMEMQ;
2838   for (col=col_start; col<col_end; col++){
2839     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2840     if (coloring->htype[0] == 'w') {
2841       dx = 1.0 + unorm;
2842     } else {
2843       dx  = xx[col];
2844     }
2845     if (dx == (PetscScalar)0.0) dx = 1.0;
2846 #if !defined(PETSC_USE_COMPLEX)
2847     if (dx < umin && dx >= 0.0)      dx = umin;
2848     else if (dx < 0.0 && dx > -umin) dx = -umin;
2849 #else
2850     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2851     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2852 #endif
2853     dx               *= epsilon;
2854     vscale_array[col] = (PetscScalar)1.0/dx;
2855   }     CHKMEMQ;
2856   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2857   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2858   if (ctype == IS_COLORING_GLOBAL){
2859     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2860     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2861   }
2862   CHKMEMQ;
2863   if (coloring->vscaleforrow) {
2864     vscaleforrow = coloring->vscaleforrow;
2865   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2866 
2867   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2868   /*
2869     Loop over each color
2870   */
2871   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2872   for (k=0; k<coloring->ncolors; k++) {
2873     coloring->currentcolor = k;
2874     for (i=0; i<bs; i++) {
2875       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2876       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2877       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2878       /*
2879 	Loop over each column associated with color
2880 	adding the perturbation to the vector w3.
2881       */
2882       for (l=0; l<coloring->ncolumns[k]; l++) {
2883 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2884 	if (coloring->htype[0] == 'w') {
2885 	  dx = 1.0 + unorm;
2886 	} else {
2887 	  dx  = xx[col];
2888 	}
2889 	if (dx == (PetscScalar)0.0) dx = 1.0;
2890 #if !defined(PETSC_USE_COMPLEX)
2891 	if (dx < umin && dx >= 0.0)      dx = umin;
2892 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2893 #else
2894 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2895 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2896 #endif
2897 	dx            *= epsilon;
2898 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2899 	w3_array[col] += dx;
2900       }
2901       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2902       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2903 
2904       /*
2905 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2906 	w2 = F(x1 + dx) - F(x1)
2907       */
2908       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2909       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2910       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2911       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2912 
2913       /*
2914 	Loop over rows of vector, putting results into Jacobian matrix
2915       */
2916       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2917       for (l=0; l<coloring->nrows[k]; l++) {
2918 	row    = bs*coloring->rows[k][l];             /* local row index */
2919 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2920         for (j=0; j<bs; j++) {
2921   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2922           srows[j]  = row + start + j;
2923         }
2924 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2925       }
2926       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2927     }
2928   } /* endof for each color */
2929   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2930   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2931   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2932   ierr = PetscFree(srows);CHKERRQ(ierr);
2933 
2934   coloring->currentcolor = -1;
2935   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2936   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2937   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2938   PetscFunctionReturn(0);
2939 }
2940 
2941 /* -------------------------------------------------------------------*/
2942 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2943        MatGetRow_SeqBAIJ,
2944        MatRestoreRow_SeqBAIJ,
2945        MatMult_SeqBAIJ_N,
2946 /* 4*/ MatMultAdd_SeqBAIJ_N,
2947        MatMultTranspose_SeqBAIJ,
2948        MatMultTransposeAdd_SeqBAIJ,
2949        0,
2950        0,
2951        0,
2952 /*10*/ 0,
2953        MatLUFactor_SeqBAIJ,
2954        0,
2955        0,
2956        MatTranspose_SeqBAIJ,
2957 /*15*/ MatGetInfo_SeqBAIJ,
2958        MatEqual_SeqBAIJ,
2959        MatGetDiagonal_SeqBAIJ,
2960        MatDiagonalScale_SeqBAIJ,
2961        MatNorm_SeqBAIJ,
2962 /*20*/ 0,
2963        MatAssemblyEnd_SeqBAIJ,
2964        MatSetOption_SeqBAIJ,
2965        MatZeroEntries_SeqBAIJ,
2966 /*24*/ MatZeroRows_SeqBAIJ,
2967        0,
2968        0,
2969        0,
2970        0,
2971 /*29*/ MatSetUp_SeqBAIJ,
2972        0,
2973        0,
2974        MatGetArray_SeqBAIJ,
2975        MatRestoreArray_SeqBAIJ,
2976 /*34*/ MatDuplicate_SeqBAIJ,
2977        0,
2978        0,
2979        MatILUFactor_SeqBAIJ,
2980        0,
2981 /*39*/ MatAXPY_SeqBAIJ,
2982        MatGetSubMatrices_SeqBAIJ,
2983        MatIncreaseOverlap_SeqBAIJ,
2984        MatGetValues_SeqBAIJ,
2985        MatCopy_SeqBAIJ,
2986 /*44*/ 0,
2987        MatScale_SeqBAIJ,
2988        0,
2989        0,
2990        MatZeroRowsColumns_SeqBAIJ,
2991 /*49*/ MatSetBlockSize_SeqBAIJ,
2992        MatGetRowIJ_SeqBAIJ,
2993        MatRestoreRowIJ_SeqBAIJ,
2994        MatGetColumnIJ_SeqBAIJ,
2995        MatRestoreColumnIJ_SeqBAIJ,
2996 /*54*/ MatFDColoringCreate_SeqAIJ,
2997        0,
2998        0,
2999        0,
3000        MatSetValuesBlocked_SeqBAIJ,
3001 /*59*/ MatGetSubMatrix_SeqBAIJ,
3002        MatDestroy_SeqBAIJ,
3003        MatView_SeqBAIJ,
3004        0,
3005        0,
3006 /*64*/ 0,
3007        0,
3008        0,
3009        0,
3010        0,
3011 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3012        0,
3013        MatConvert_Basic,
3014        0,
3015        0,
3016 /*74*/ 0,
3017        MatFDColoringApply_BAIJ,
3018        0,
3019        0,
3020        0,
3021 /*79*/ 0,
3022        0,
3023        0,
3024        0,
3025        MatLoad_SeqBAIJ,
3026 /*84*/ 0,
3027        0,
3028        0,
3029        0,
3030        0,
3031 /*89*/ 0,
3032        0,
3033        0,
3034        0,
3035        0,
3036 /*94*/ 0,
3037        0,
3038        0,
3039        0,
3040        0,
3041 /*99*/0,
3042        0,
3043        0,
3044        0,
3045        0,
3046 /*104*/0,
3047        MatRealPart_SeqBAIJ,
3048        MatImaginaryPart_SeqBAIJ,
3049        0,
3050        0,
3051 /*109*/0,
3052        0,
3053        0,
3054        0,
3055        MatMissingDiagonal_SeqBAIJ,
3056 /*114*/0,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*119*/0,
3062        0,
3063        MatMultHermitianTranspose_SeqBAIJ,
3064        MatMultHermitianTransposeAdd_SeqBAIJ,
3065        0,
3066 /*124*/0,
3067        0,
3068        MatInvertBlockDiagonal_SeqBAIJ
3069 };
3070 
3071 EXTERN_C_BEGIN
3072 #undef __FUNCT__
3073 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3074 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3075 {
3076   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3077   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3078   PetscErrorCode ierr;
3079 
3080   PetscFunctionBegin;
3081   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3082 
3083   /* allocate space for values if not already there */
3084   if (!aij->saved_values) {
3085     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3086     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3087   }
3088 
3089   /* copy values over */
3090   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3091   PetscFunctionReturn(0);
3092 }
3093 EXTERN_C_END
3094 
3095 EXTERN_C_BEGIN
3096 #undef __FUNCT__
3097 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3098 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3099 {
3100   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3101   PetscErrorCode ierr;
3102   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3103 
3104   PetscFunctionBegin;
3105   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3106   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3107 
3108   /* copy values over */
3109   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3110   PetscFunctionReturn(0);
3111 }
3112 EXTERN_C_END
3113 
3114 EXTERN_C_BEGIN
3115 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3116 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3117 EXTERN_C_END
3118 
3119 EXTERN_C_BEGIN
3120 #undef __FUNCT__
3121 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3122 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3123 {
3124   Mat_SeqBAIJ    *b;
3125   PetscErrorCode ierr;
3126   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3127   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3128 
3129   PetscFunctionBegin;
3130   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3131   if (nz == MAT_SKIP_ALLOCATION) {
3132     skipallocation = PETSC_TRUE;
3133     nz             = 0;
3134   }
3135 
3136   if (bs < 0) {
3137     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3138       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3139     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3140     bs   = PetscAbs(bs);
3141   }
3142   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3143   bs   = newbs;
3144 
3145   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3146   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3147   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3148   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3149 
3150   B->preallocated = PETSC_TRUE;
3151 
3152   mbs  = B->rmap->n/bs;
3153   nbs  = B->cmap->n/bs;
3154   bs2  = bs*bs;
3155 
3156   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3157 
3158   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3159   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3160   if (nnz) {
3161     for (i=0; i<mbs; i++) {
3162       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3163       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3164     }
3165   }
3166 
3167   b       = (Mat_SeqBAIJ*)B->data;
3168   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3169     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3170   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3171 
3172   if (!flg) {
3173     switch (bs) {
3174     case 1:
3175       B->ops->mult            = MatMult_SeqBAIJ_1;
3176       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3177       B->ops->sor             = MatSOR_SeqBAIJ_1;
3178       break;
3179     case 2:
3180       B->ops->mult            = MatMult_SeqBAIJ_2;
3181       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3182       B->ops->sor             = MatSOR_SeqBAIJ_2;
3183       break;
3184     case 3:
3185       B->ops->mult            = MatMult_SeqBAIJ_3;
3186       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3187       B->ops->sor             = MatSOR_SeqBAIJ_3;
3188       break;
3189     case 4:
3190       B->ops->mult            = MatMult_SeqBAIJ_4;
3191       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3192       B->ops->sor             = MatSOR_SeqBAIJ_4;
3193       break;
3194     case 5:
3195       B->ops->mult            = MatMult_SeqBAIJ_5;
3196       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3197       B->ops->sor             = MatSOR_SeqBAIJ_5;
3198       break;
3199     case 6:
3200       B->ops->mult            = MatMult_SeqBAIJ_6;
3201       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3202       B->ops->sor             = MatSOR_SeqBAIJ_6;
3203       break;
3204     case 7:
3205       B->ops->mult            = MatMult_SeqBAIJ_7;
3206       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3207       B->ops->sor             = MatSOR_SeqBAIJ_7;
3208       break;
3209     case 15:
3210       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3211       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3212       B->ops->sor             = MatSOR_SeqBAIJ_N;
3213       break;
3214     default:
3215       B->ops->mult            = MatMult_SeqBAIJ_N;
3216       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3217       B->ops->sor             = MatSOR_SeqBAIJ_N;
3218       break;
3219     }
3220   }
3221   B->rmap->bs  = bs;
3222   b->mbs       = mbs;
3223   b->nbs       = nbs;
3224   if (!skipallocation) {
3225     if (!b->imax) {
3226       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3227       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3228       b->free_imax_ilen = PETSC_TRUE;
3229     }
3230     /* b->ilen will count nonzeros in each block row so far. */
3231     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3232     if (!nnz) {
3233       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3234       else if (nz < 0) nz = 1;
3235       for (i=0; i<mbs; i++) b->imax[i] = nz;
3236       nz = nz*mbs;
3237     } else {
3238       nz = 0;
3239       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3240     }
3241 
3242     /* allocate the matrix space */
3243     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3244     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3245     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3246     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3247     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3248     b->singlemalloc = PETSC_TRUE;
3249     b->i[0] = 0;
3250     for (i=1; i<mbs+1; i++) {
3251       b->i[i] = b->i[i-1] + b->imax[i-1];
3252     }
3253     b->free_a     = PETSC_TRUE;
3254     b->free_ij    = PETSC_TRUE;
3255   } else {
3256     b->free_a     = PETSC_FALSE;
3257     b->free_ij    = PETSC_FALSE;
3258   }
3259 
3260   B->rmap->bs          = bs;
3261   b->bs2              = bs2;
3262   b->mbs              = mbs;
3263   b->nz               = 0;
3264   b->maxnz            = nz;
3265   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3266   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3267   PetscFunctionReturn(0);
3268 }
3269 EXTERN_C_END
3270 
3271 EXTERN_C_BEGIN
3272 #undef __FUNCT__
3273 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3274 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3275 {
3276   PetscInt       i,m,nz,nz_max=0,*nnz;
3277   PetscScalar    *values=0;
3278   PetscErrorCode ierr;
3279 
3280   PetscFunctionBegin;
3281   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3282   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3283   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3284   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3285   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3286   m = B->rmap->n/bs;
3287 
3288   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3289   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3290   for(i=0; i<m; i++) {
3291     nz = ii[i+1]- ii[i];
3292     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3293     nz_max = PetscMax(nz_max, nz);
3294     nnz[i] = nz;
3295   }
3296   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3297   ierr = PetscFree(nnz);CHKERRQ(ierr);
3298 
3299   values = (PetscScalar*)V;
3300   if (!values) {
3301     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3302     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3303   }
3304   for (i=0; i<m; i++) {
3305     PetscInt          ncols  = ii[i+1] - ii[i];
3306     const PetscInt    *icols = jj + ii[i];
3307     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3308     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3309   }
3310   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3311   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3313   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 EXTERN_C_END
3317 
3318 
3319 EXTERN_C_BEGIN
3320 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3321 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3322 #if defined(PETSC_HAVE_MUMPS)
3323 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3324 #endif
3325 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3326 EXTERN_C_END
3327 
3328 /*MC
3329    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3330    block sparse compressed row format.
3331 
3332    Options Database Keys:
3333 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3334 
3335   Level: beginner
3336 
3337 .seealso: MatCreateSeqBAIJ()
3338 M*/
3339 
3340 EXTERN_C_BEGIN
3341 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3342 EXTERN_C_END
3343 
3344 EXTERN_C_BEGIN
3345 #undef __FUNCT__
3346 #define __FUNCT__ "MatCreate_SeqBAIJ"
3347 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3348 {
3349   PetscErrorCode ierr;
3350   PetscMPIInt    size;
3351   Mat_SeqBAIJ    *b;
3352 
3353   PetscFunctionBegin;
3354   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3355   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3356 
3357   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3358   B->data = (void*)b;
3359   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3360   b->row                   = 0;
3361   b->col                   = 0;
3362   b->icol                  = 0;
3363   b->reallocs              = 0;
3364   b->saved_values          = 0;
3365 
3366   b->roworiented           = PETSC_TRUE;
3367   b->nonew                 = 0;
3368   b->diag                  = 0;
3369   b->solve_work            = 0;
3370   b->mult_work             = 0;
3371   B->spptr                 = 0;
3372   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3373   b->keepnonzeropattern    = PETSC_FALSE;
3374   b->xtoy                  = 0;
3375   b->XtoY                  = 0;
3376   B->same_nonzero          = PETSC_FALSE;
3377 
3378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3379                                      "MatGetFactorAvailable_seqbaij_petsc",
3380                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3382                                      "MatGetFactor_seqbaij_petsc",
3383                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3384   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3385                                      "MatGetFactor_seqbaij_bstrm",
3386                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3387 #if defined(PETSC_HAVE_MUMPS)
3388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3389 #endif
3390   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3391                                      "MatInvertBlockDiagonal_SeqBAIJ",
3392                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3393   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3394                                      "MatStoreValues_SeqBAIJ",
3395                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3397                                      "MatRetrieveValues_SeqBAIJ",
3398                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3400                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3401                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3403                                      "MatConvert_SeqBAIJ_SeqAIJ",
3404                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3406                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3407                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3409                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3410                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3412                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3413                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3415                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3416                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3417   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3418                                      "MatIsTranspose_SeqBAIJ",
3419                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3420   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3421   PetscFunctionReturn(0);
3422 }
3423 EXTERN_C_END
3424 
3425 #undef __FUNCT__
3426 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3427 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3428 {
3429   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3430   PetscErrorCode ierr;
3431   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3432 
3433   PetscFunctionBegin;
3434   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3435 
3436   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3437     c->imax = a->imax;
3438     c->ilen = a->ilen;
3439     c->free_imax_ilen = PETSC_FALSE;
3440   } else {
3441     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3442     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3443     for (i=0; i<mbs; i++) {
3444       c->imax[i] = a->imax[i];
3445       c->ilen[i] = a->ilen[i];
3446     }
3447     c->free_imax_ilen = PETSC_TRUE;
3448   }
3449 
3450   /* allocate the matrix space */
3451   if (mallocmatspace){
3452     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3453       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3454       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3455       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3456       c->i            = a->i;
3457       c->j            = a->j;
3458       c->singlemalloc = PETSC_FALSE;
3459       c->free_a       = PETSC_TRUE;
3460       c->free_ij      = PETSC_FALSE;
3461       c->parent       = A;
3462       C->preallocated = PETSC_TRUE;
3463       C->assembled    = PETSC_TRUE;
3464       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3465       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3466       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3467     } else {
3468       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3469       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3470       c->singlemalloc = PETSC_TRUE;
3471       c->free_a       = PETSC_TRUE;
3472       c->free_ij      = PETSC_TRUE;
3473       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3474       if (mbs > 0) {
3475 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3476 	if (cpvalues == MAT_COPY_VALUES) {
3477 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3478 	} else {
3479 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3480 	}
3481       }
3482       C->preallocated = PETSC_TRUE;
3483       C->assembled    = PETSC_TRUE;
3484     }
3485   }
3486 
3487   c->roworiented = a->roworiented;
3488   c->nonew       = a->nonew;
3489   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3490   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3491   c->bs2         = a->bs2;
3492   c->mbs         = a->mbs;
3493   c->nbs         = a->nbs;
3494 
3495   if (a->diag) {
3496     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3497       c->diag      = a->diag;
3498       c->free_diag = PETSC_FALSE;
3499     } else {
3500       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3501       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3502       for (i=0; i<mbs; i++) {
3503         c->diag[i] = a->diag[i];
3504       }
3505       c->free_diag = PETSC_TRUE;
3506     }
3507   } else c->diag        = 0;
3508   c->nz                 = a->nz;
3509   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3510   c->solve_work         = 0;
3511   c->mult_work          = 0;
3512 
3513   c->compressedrow.use     = a->compressedrow.use;
3514   c->compressedrow.nrows   = a->compressedrow.nrows;
3515   c->compressedrow.check   = a->compressedrow.check;
3516   if (a->compressedrow.use){
3517     i = a->compressedrow.nrows;
3518     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3519     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3520     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3521     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3522   } else {
3523     c->compressedrow.use    = PETSC_FALSE;
3524     c->compressedrow.i      = PETSC_NULL;
3525     c->compressedrow.rindex = PETSC_NULL;
3526   }
3527   C->same_nonzero = A->same_nonzero;
3528   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3529   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3530   PetscFunctionReturn(0);
3531 }
3532 
3533 #undef __FUNCT__
3534 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3535 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3536 {
3537     PetscErrorCode ierr;
3538 
3539   PetscFunctionBegin;
3540   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3541   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3542   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3543   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3544   PetscFunctionReturn(0);
3545 }
3546 
3547 #undef __FUNCT__
3548 #define __FUNCT__ "MatLoad_SeqBAIJ"
3549 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3550 {
3551   Mat_SeqBAIJ    *a;
3552   PetscErrorCode ierr;
3553   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3554   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3555   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3556   PetscInt       *masked,nmask,tmp,bs2,ishift;
3557   PetscMPIInt    size;
3558   int            fd;
3559   PetscScalar    *aa;
3560   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3561 
3562   PetscFunctionBegin;
3563   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3564   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3565   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3566   bs2  = bs*bs;
3567 
3568   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3569   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3570   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3571   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3572   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3573   M = header[1]; N = header[2]; nz = header[3];
3574 
3575   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3576   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3577 
3578   /*
3579      This code adds extra rows to make sure the number of rows is
3580     divisible by the blocksize
3581   */
3582   mbs        = M/bs;
3583   extra_rows = bs - M + bs*(mbs);
3584   if (extra_rows == bs) extra_rows = 0;
3585   else                  mbs++;
3586   if (extra_rows) {
3587     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3588   }
3589 
3590   /* Set global sizes if not already set */
3591   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3592     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3593   } else { /* Check if the matrix global sizes are correct */
3594     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3595     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3596       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3597     }
3598     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3599   }
3600 
3601   /* read in row lengths */
3602   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3603   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3604   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3605 
3606   /* read in column indices */
3607   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3608   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3609   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3610 
3611   /* loop over row lengths determining block row lengths */
3612   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3613   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3614   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3615   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3616   rowcount = 0;
3617   nzcount = 0;
3618   for (i=0; i<mbs; i++) {
3619     nmask = 0;
3620     for (j=0; j<bs; j++) {
3621       kmax = rowlengths[rowcount];
3622       for (k=0; k<kmax; k++) {
3623         tmp = jj[nzcount++]/bs;
3624         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3625       }
3626       rowcount++;
3627     }
3628     browlengths[i] += nmask;
3629     /* zero out the mask elements we set */
3630     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3631   }
3632 
3633   /* Do preallocation  */
3634   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3635   a = (Mat_SeqBAIJ*)newmat->data;
3636 
3637   /* set matrix "i" values */
3638   a->i[0] = 0;
3639   for (i=1; i<= mbs; i++) {
3640     a->i[i]      = a->i[i-1] + browlengths[i-1];
3641     a->ilen[i-1] = browlengths[i-1];
3642   }
3643   a->nz         = 0;
3644   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3645 
3646   /* read in nonzero values */
3647   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3648   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3649   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3650 
3651   /* set "a" and "j" values into matrix */
3652   nzcount = 0; jcount = 0;
3653   for (i=0; i<mbs; i++) {
3654     nzcountb = nzcount;
3655     nmask    = 0;
3656     for (j=0; j<bs; j++) {
3657       kmax = rowlengths[i*bs+j];
3658       for (k=0; k<kmax; k++) {
3659         tmp = jj[nzcount++]/bs;
3660 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3661       }
3662     }
3663     /* sort the masked values */
3664     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3665 
3666     /* set "j" values into matrix */
3667     maskcount = 1;
3668     for (j=0; j<nmask; j++) {
3669       a->j[jcount++]  = masked[j];
3670       mask[masked[j]] = maskcount++;
3671     }
3672     /* set "a" values into matrix */
3673     ishift = bs2*a->i[i];
3674     for (j=0; j<bs; j++) {
3675       kmax = rowlengths[i*bs+j];
3676       for (k=0; k<kmax; k++) {
3677         tmp       = jj[nzcountb]/bs ;
3678         block     = mask[tmp] - 1;
3679         point     = jj[nzcountb] - bs*tmp;
3680         idx       = ishift + bs2*block + j + bs*point;
3681         a->a[idx] = (MatScalar)aa[nzcountb++];
3682       }
3683     }
3684     /* zero out the mask elements we set */
3685     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3686   }
3687   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3688 
3689   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3690   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3691   ierr = PetscFree(aa);CHKERRQ(ierr);
3692   ierr = PetscFree(jj);CHKERRQ(ierr);
3693   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3694 
3695   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3696   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3697   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3698   PetscFunctionReturn(0);
3699 }
3700 
3701 #undef __FUNCT__
3702 #define __FUNCT__ "MatCreateSeqBAIJ"
3703 /*@C
3704    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3705    compressed row) format.  For good matrix assembly performance the
3706    user should preallocate the matrix storage by setting the parameter nz
3707    (or the array nnz).  By setting these parameters accurately, performance
3708    during matrix assembly can be increased by more than a factor of 50.
3709 
3710    Collective on MPI_Comm
3711 
3712    Input Parameters:
3713 +  comm - MPI communicator, set to PETSC_COMM_SELF
3714 .  bs - size of block
3715 .  m - number of rows
3716 .  n - number of columns
3717 .  nz - number of nonzero blocks  per block row (same for all rows)
3718 -  nnz - array containing the number of nonzero blocks in the various block rows
3719          (possibly different for each block row) or PETSC_NULL
3720 
3721    Output Parameter:
3722 .  A - the matrix
3723 
3724    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3725    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3726    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3727 
3728    Options Database Keys:
3729 .   -mat_no_unroll - uses code that does not unroll the loops in the
3730                      block calculations (much slower)
3731 .    -mat_block_size - size of the blocks to use
3732 
3733    Level: intermediate
3734 
3735    Notes:
3736    The number of rows and columns must be divisible by blocksize.
3737 
3738    If the nnz parameter is given then the nz parameter is ignored
3739 
3740    A nonzero block is any block that as 1 or more nonzeros in it
3741 
3742    The block AIJ format is fully compatible with standard Fortran 77
3743    storage.  That is, the stored row and column indices can begin at
3744    either one (as in Fortran) or zero.  See the users' manual for details.
3745 
3746    Specify the preallocated storage with either nz or nnz (not both).
3747    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3748    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3749    matrices.
3750 
3751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3752 @*/
3753 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3754 {
3755   PetscErrorCode ierr;
3756 
3757   PetscFunctionBegin;
3758   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3759   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3760   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3761   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3762   PetscFunctionReturn(0);
3763 }
3764 
3765 #undef __FUNCT__
3766 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3767 /*@C
3768    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3769    per row in the matrix. For good matrix assembly performance the
3770    user should preallocate the matrix storage by setting the parameter nz
3771    (or the array nnz).  By setting these parameters accurately, performance
3772    during matrix assembly can be increased by more than a factor of 50.
3773 
3774    Collective on MPI_Comm
3775 
3776    Input Parameters:
3777 +  A - the matrix
3778 .  bs - size of block
3779 .  nz - number of block nonzeros per block row (same for all rows)
3780 -  nnz - array containing the number of block nonzeros in the various block rows
3781          (possibly different for each block row) or PETSC_NULL
3782 
3783    Options Database Keys:
3784 .   -mat_no_unroll - uses code that does not unroll the loops in the
3785                      block calculations (much slower)
3786 .    -mat_block_size - size of the blocks to use
3787 
3788    Level: intermediate
3789 
3790    Notes:
3791    If the nnz parameter is given then the nz parameter is ignored
3792 
3793    You can call MatGetInfo() to get information on how effective the preallocation was;
3794    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3795    You can also run with the option -info and look for messages with the string
3796    malloc in them to see if additional memory allocation was needed.
3797 
3798    The block AIJ format is fully compatible with standard Fortran 77
3799    storage.  That is, the stored row and column indices can begin at
3800    either one (as in Fortran) or zero.  See the users' manual for details.
3801 
3802    Specify the preallocated storage with either nz or nnz (not both).
3803    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3804    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3805 
3806 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3807 @*/
3808 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3809 {
3810   PetscErrorCode ierr;
3811 
3812   PetscFunctionBegin;
3813   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3814   PetscValidType(B,1);
3815   PetscValidLogicalCollectiveInt(B,bs,2);
3816   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3817   PetscFunctionReturn(0);
3818 }
3819 
3820 #undef __FUNCT__
3821 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3822 /*@C
3823    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3824    (the default sequential PETSc format).
3825 
3826    Collective on MPI_Comm
3827 
3828    Input Parameters:
3829 +  A - the matrix
3830 .  i - the indices into j for the start of each local row (starts with zero)
3831 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3832 -  v - optional values in the matrix
3833 
3834    Level: developer
3835 
3836 .keywords: matrix, aij, compressed row, sparse
3837 
3838 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3839 @*/
3840 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3841 {
3842   PetscErrorCode ierr;
3843 
3844   PetscFunctionBegin;
3845   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3846   PetscValidType(B,1);
3847   PetscValidLogicalCollectiveInt(B,bs,2);
3848   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3849   PetscFunctionReturn(0);
3850 }
3851 
3852 
3853 #undef __FUNCT__
3854 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3855 /*@
3856      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3857 
3858      Collective on MPI_Comm
3859 
3860    Input Parameters:
3861 +  comm - must be an MPI communicator of size 1
3862 .  bs - size of block
3863 .  m - number of rows
3864 .  n - number of columns
3865 .  i - row indices
3866 .  j - column indices
3867 -  a - matrix values
3868 
3869    Output Parameter:
3870 .  mat - the matrix
3871 
3872    Level: advanced
3873 
3874    Notes:
3875        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3876     once the matrix is destroyed
3877 
3878        You cannot set new nonzero locations into this matrix, that will generate an error.
3879 
3880        The i and j indices are 0 based
3881 
3882        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3883 
3884 
3885 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3886 
3887 @*/
3888 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3889 {
3890   PetscErrorCode ierr;
3891   PetscInt       ii;
3892   Mat_SeqBAIJ    *baij;
3893 
3894   PetscFunctionBegin;
3895   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3896   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3897 
3898   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3899   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3900   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3901   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3902   baij = (Mat_SeqBAIJ*)(*mat)->data;
3903   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3904   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3905 
3906   baij->i = i;
3907   baij->j = j;
3908   baij->a = a;
3909   baij->singlemalloc = PETSC_FALSE;
3910   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3911   baij->free_a       = PETSC_FALSE;
3912   baij->free_ij      = PETSC_FALSE;
3913 
3914   for (ii=0; ii<m; ii++) {
3915     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3916 #if defined(PETSC_USE_DEBUG)
3917     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3918 #endif
3919   }
3920 #if defined(PETSC_USE_DEBUG)
3921   for (ii=0; ii<baij->i[m]; ii++) {
3922     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3923     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3924   }
3925 #endif
3926 
3927   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3928   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3929   PetscFunctionReturn(0);
3930 }
3931