xref: /petsc/src/mat/impls/baij/seq/baij.c (revision e27a552be151e08936ff7d65f1f2e8dae4b63b83)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) PetscFunctionReturn(0);
23   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
24   diag_offset = a->diag;
25   if (!a->idiag) {
26     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
27     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
28   }
29   diag    = a->idiag;
30   mdiag   = a->idiag+bs2*mbs;
31   if (values) *values = a->idiag;
32   /* factor and invert each block */
33   switch (bs){
34     case 1:
35       for (i=0; i<mbs; i++) {
36         odiag = v + 1*diag_offset[i];
37         diag[0]  = odiag[0];
38         mdiag[0] = odiag[0];
39         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
40         diag    += 1;
41         mdiag   += 1;
42       }
43       break;
44     case 2:
45       for (i=0; i<mbs; i++) {
46         odiag   = v + 4*diag_offset[i];
47         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
48 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
49 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
50 	diag    += 4;
51 	mdiag   += 4;
52       }
53       break;
54     case 3:
55       for (i=0; i<mbs; i++) {
56         odiag    = v + 9*diag_offset[i];
57         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
58         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
59         diag[8]  = odiag[8];
60         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
61         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
62         mdiag[8] = odiag[8];
63 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
64 	diag    += 9;
65 	mdiag   += 9;
66       }
67       break;
68     case 4:
69       for (i=0; i<mbs; i++) {
70         odiag  = v + 16*diag_offset[i];
71         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
72         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
73 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
74 	diag  += 16;
75 	mdiag += 16;
76       }
77       break;
78     case 5:
79       for (i=0; i<mbs; i++) {
80         odiag = v + 25*diag_offset[i];
81         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
82         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
83 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
84 	diag  += 25;
85 	mdiag += 25;
86       }
87       break;
88     case 6:
89       for (i=0; i<mbs; i++) {
90         odiag = v + 36*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
94 	diag  += 36;
95 	mdiag += 36;
96       }
97       break;
98     case 7:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 49*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
104 	diag  += 49;
105 	mdiag += 49;
106       }
107       break;
108     default:
109       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
110       for (i=0; i<mbs; i++) {
111         odiag = v + bs2*diag_offset[i];
112         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
113         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
114         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
115 	diag  += bs2;
116 	mdiag += bs2;
117       }
118       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
119   }
120   a->idiagvalid = PETSC_TRUE;
121   PetscFunctionReturn(0);
122 }
123 
124 #undef __FUNCT__
125 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
126 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
127 {
128   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
129   PetscScalar        *x,x1,s1;
130   const PetscScalar  *b;
131   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
132   PetscErrorCode     ierr;
133   PetscInt           m = a->mbs,i,i2,nz,j;
134   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
135 
136   PetscFunctionBegin;
137   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
138   its = its*lits;
139   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
140   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
141   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
142   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
143   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
144 
145   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
146 
147   diag  = a->diag;
148   idiag = a->idiag;
149   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
150   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
151 
152   if (flag & SOR_ZERO_INITIAL_GUESS) {
153     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
154       x[0] = b[0]*idiag[0];
155       i2     = 1;
156       idiag += 1;
157       for (i=1; i<m; i++) {
158         v     = aa + ai[i];
159         vi    = aj + ai[i];
160         nz    = diag[i] - ai[i];
161         s1    = b[i2];
162         for (j=0; j<nz; j++) {
163           s1 -= v[j]*x[vi[j]];
164         }
165         x[i2]   = idiag[0]*s1;
166         idiag   += 1;
167         i2      += 1;
168       }
169       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
170       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
171     }
172     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
173         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
174       i2    = 0;
175       mdiag = a->idiag+a->mbs;
176       for (i=0; i<m; i++) {
177         x1      = x[i2];
178         x[i2]   = mdiag[0]*x1;
179         mdiag  += 1;
180         i2     += 1;
181       }
182       ierr = PetscLogFlops(m);CHKERRQ(ierr);
183     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
184       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
185     }
186     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
187       idiag   = a->idiag+a->mbs - 1;
188       i2      = m - 1;
189       x1      = x[i2];
190       x[i2]   = idiag[0]*x1;
191       idiag -= 1;
192       i2    -= 1;
193       for (i=m-2; i>=0; i--) {
194         v     = aa + (diag[i]+1);
195         vi    = aj + diag[i] + 1;
196         nz    = ai[i+1] - diag[i] - 1;
197         s1    = x[i2];
198         for (j=0; j<nz; j++) {
199           s1 -= v[j]*x[vi[j]];
200         }
201         x[i2]   = idiag[0]*s1;
202         idiag   -= 1;
203         i2      -= 1;
204       }
205       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
206     }
207   } else {
208     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
209   }
210   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
211   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
212   PetscFunctionReturn(0);
213 }
214 
215 #undef __FUNCT__
216 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
217 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
218 {
219   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
220   PetscScalar        *x,x1,x2,s1,s2;
221   const PetscScalar  *b;
222   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
223   PetscErrorCode     ierr;
224   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
225   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
226 
227   PetscFunctionBegin;
228   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
229   its = its*lits;
230   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
231   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
232   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
233   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
234   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
235 
236   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
237 
238   diag  = a->diag;
239   idiag = a->idiag;
240   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
241   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
242 
243   if (flag & SOR_ZERO_INITIAL_GUESS) {
244     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
245       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
246       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
247       i2     = 2;
248       idiag += 4;
249       for (i=1; i<m; i++) {
250 	v     = aa + 4*ai[i];
251 	vi    = aj + ai[i];
252 	nz    = diag[i] - ai[i];
253 	s1    = b[i2]; s2 = b[i2+1];
254         for (j=0; j<nz; j++) {
255 	  idx  = 2*vi[j];
256           it   = 4*j;
257 	  x1   = x[idx]; x2 = x[1+idx];
258 	  s1  -= v[it]*x1 + v[it+2]*x2;
259 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
260 	}
261 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
262 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
263         idiag   += 4;
264         i2      += 2;
265       }
266       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
267       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
268     }
269     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
270         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
271       i2    = 0;
272       mdiag = a->idiag+4*a->mbs;
273       for (i=0; i<m; i++) {
274         x1      = x[i2]; x2 = x[i2+1];
275         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
276         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
277         mdiag  += 4;
278         i2     += 2;
279       }
280       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
281     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
282       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
283     }
284     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
285       idiag   = a->idiag+4*a->mbs - 4;
286       i2      = 2*m - 2;
287       x1      = x[i2]; x2 = x[i2+1];
288       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
289       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
290       idiag -= 4;
291       i2    -= 2;
292       for (i=m-2; i>=0; i--) {
293 	v     = aa + 4*(diag[i]+1);
294 	vi    = aj + diag[i] + 1;
295 	nz    = ai[i+1] - diag[i] - 1;
296 	s1    = x[i2]; s2 = x[i2+1];
297         for (j=0; j<nz; j++) {
298  	  idx  = 2*vi[j];
299           it   = 4*j;
300 	  x1   = x[idx]; x2 = x[1+idx];
301 	  s1  -= v[it]*x1 + v[it+2]*x2;
302 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
303 	}
304 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
305 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
306         idiag   -= 4;
307         i2      -= 2;
308       }
309       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
310     }
311   } else {
312     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
313   }
314   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
315   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
316   PetscFunctionReturn(0);
317 }
318 
319 #undef __FUNCT__
320 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
321 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
322 {
323   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
324   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
325   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
326   const PetscScalar  *b;
327   PetscErrorCode     ierr;
328   PetscInt           m = a->mbs,i,i2,nz,idx;
329   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
330 
331   PetscFunctionBegin;
332   its = its*lits;
333   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
334   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
335   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
336   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
337   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
338   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
339 
340   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
341 
342   diag  = a->diag;
343   idiag = a->idiag;
344   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
345   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
346 
347   if (flag & SOR_ZERO_INITIAL_GUESS) {
348     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
349       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
350       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
351       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
352       i2     = 3;
353       idiag += 9;
354       for (i=1; i<m; i++) {
355         v     = aa + 9*ai[i];
356         vi    = aj + ai[i];
357         nz    = diag[i] - ai[i];
358         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
359         while (nz--) {
360           idx  = 3*(*vi++);
361           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
362           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
363           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
364           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
365           v   += 9;
366         }
367         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
368         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
369         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
370         idiag   += 9;
371         i2      += 3;
372       }
373       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
374       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
375     }
376     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
377         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
378       i2    = 0;
379       mdiag = a->idiag+9*a->mbs;
380       for (i=0; i<m; i++) {
381         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
382         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
383         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
384         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
385         mdiag  += 9;
386         i2     += 3;
387       }
388       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
389     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
390       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
391     }
392     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
393       idiag   = a->idiag+9*a->mbs - 9;
394       i2      = 3*m - 3;
395       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
396       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
397       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
398       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
399       idiag -= 9;
400       i2    -= 3;
401       for (i=m-2; i>=0; i--) {
402         v     = aa + 9*(diag[i]+1);
403         vi    = aj + diag[i] + 1;
404         nz    = ai[i+1] - diag[i] - 1;
405         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
406         while (nz--) {
407           idx  = 3*(*vi++);
408           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
409           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
410           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
411           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
412           v   += 9;
413         }
414         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
415         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
416         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
417         idiag   -= 9;
418         i2      -= 3;
419       }
420       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
421     }
422   } else {
423     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
424   }
425   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
426   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
427   PetscFunctionReturn(0);
428 }
429 
430 #undef __FUNCT__
431 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
432 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
433 {
434   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
435   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
436   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
437   const PetscScalar  *b;
438   PetscErrorCode     ierr;
439   PetscInt           m = a->mbs,i,i2,nz,idx;
440   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
441 
442   PetscFunctionBegin;
443   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
444   its = its*lits;
445   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
446   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
447   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
448   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
449   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
450 
451   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
452 
453   diag  = a->diag;
454   idiag = a->idiag;
455   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
456   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
457 
458   if (flag & SOR_ZERO_INITIAL_GUESS) {
459     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
460       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
461       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
462       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
463       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
464       i2     = 4;
465       idiag += 16;
466       for (i=1; i<m; i++) {
467 	v     = aa + 16*ai[i];
468 	vi    = aj + ai[i];
469 	nz    = diag[i] - ai[i];
470 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
471 	while (nz--) {
472 	  idx  = 4*(*vi++);
473 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
474 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
475 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
476 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
477 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
478 	  v   += 16;
479 	}
480 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
481 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
482 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
483 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
484         idiag   += 16;
485         i2      += 4;
486       }
487       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
488       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
489     }
490     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
491         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
492       i2    = 0;
493       mdiag = a->idiag+16*a->mbs;
494       for (i=0; i<m; i++) {
495         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
496         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
497         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
498         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
499         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
500         mdiag  += 16;
501         i2     += 4;
502       }
503       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
504     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
505       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
506     }
507     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
508       idiag   = a->idiag+16*a->mbs - 16;
509       i2      = 4*m - 4;
510       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
511       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
512       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
513       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
514       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
515       idiag -= 16;
516       i2    -= 4;
517       for (i=m-2; i>=0; i--) {
518 	v     = aa + 16*(diag[i]+1);
519 	vi    = aj + diag[i] + 1;
520 	nz    = ai[i+1] - diag[i] - 1;
521 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
522 	while (nz--) {
523 	  idx  = 4*(*vi++);
524 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
525 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
526 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
527 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
528 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
529 	  v   += 16;
530 	}
531 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
532 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
533 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
534 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
535         idiag   -= 16;
536         i2      -= 4;
537       }
538       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
539     }
540   } else {
541     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
542   }
543   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
544   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
545   PetscFunctionReturn(0);
546 }
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
550 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
551 {
552   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
553   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
554   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
555   const PetscScalar  *b;
556   PetscErrorCode     ierr;
557   PetscInt           m = a->mbs,i,i2,nz,idx;
558   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
559 
560   PetscFunctionBegin;
561   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
562   its = its*lits;
563   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
564   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
565   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
566   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
567   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
568 
569   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
570 
571   diag  = a->diag;
572   idiag = a->idiag;
573   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
574   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
575 
576   if (flag & SOR_ZERO_INITIAL_GUESS) {
577     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
578       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
579       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
580       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
581       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
582       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
583       i2     = 5;
584       idiag += 25;
585       for (i=1; i<m; i++) {
586 	v     = aa + 25*ai[i];
587 	vi    = aj + ai[i];
588 	nz    = diag[i] - ai[i];
589 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
590 	while (nz--) {
591 	  idx  = 5*(*vi++);
592 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
593 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
594 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
595 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
596 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
597 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
598 	  v   += 25;
599 	}
600 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
601 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
602 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
603 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
604 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
605         idiag   += 25;
606         i2      += 5;
607       }
608       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
609       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
610     }
611     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
612         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
613       i2    = 0;
614       mdiag = a->idiag+25*a->mbs;
615       for (i=0; i<m; i++) {
616         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
617         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
618         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
619         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
620         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
621         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
622         mdiag  += 25;
623         i2     += 5;
624       }
625       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
626     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
627       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
628     }
629     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
630       idiag   = a->idiag+25*a->mbs - 25;
631       i2      = 5*m - 5;
632       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
633       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
634       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
635       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
636       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
637       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
638       idiag -= 25;
639       i2    -= 5;
640       for (i=m-2; i>=0; i--) {
641 	v     = aa + 25*(diag[i]+1);
642 	vi    = aj + diag[i] + 1;
643 	nz    = ai[i+1] - diag[i] - 1;
644 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
645 	while (nz--) {
646 	  idx  = 5*(*vi++);
647 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
648 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
649 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
650 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
651 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
652 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
653 	  v   += 25;
654 	}
655 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
656 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
657 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
658 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
659 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
660         idiag   -= 25;
661         i2      -= 5;
662       }
663       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
664     }
665   } else {
666     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
667   }
668   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
669   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
670   PetscFunctionReturn(0);
671 }
672 
673 #undef __FUNCT__
674 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
675 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
676 {
677   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
678   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
679   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
680   const PetscScalar  *b;
681   PetscErrorCode     ierr;
682   PetscInt           m = a->mbs,i,i2,nz,idx;
683   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
684 
685   PetscFunctionBegin;
686   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
687   its = its*lits;
688   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
689   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
690   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
691   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
692   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
693 
694   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
695 
696   diag  = a->diag;
697   idiag = a->idiag;
698   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
699   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
700 
701   if (flag & SOR_ZERO_INITIAL_GUESS) {
702     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
703       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
704       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
705       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
706       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
707       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
708       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
709       i2     = 6;
710       idiag += 36;
711       for (i=1; i<m; i++) {
712         v     = aa + 36*ai[i];
713         vi    = aj + ai[i];
714         nz    = diag[i] - ai[i];
715         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
716         while (nz--) {
717           idx  = 6*(*vi++);
718           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
719           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
720           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
721           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
722           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
723           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
724           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
725           v   += 36;
726         }
727         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
728         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
729         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
730         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
731         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
732         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
733         idiag   += 36;
734         i2      += 6;
735       }
736       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
737       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
738     }
739     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
740         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
741       i2    = 0;
742       mdiag = a->idiag+36*a->mbs;
743       for (i=0; i<m; i++) {
744         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
745         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
746         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
747         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
748         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
749         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
750         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
751         mdiag  += 36;
752         i2     += 6;
753       }
754       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
755     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
756       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
757     }
758     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
759       idiag   = a->idiag+36*a->mbs - 36;
760       i2      = 6*m - 6;
761       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
762       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
763       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
764       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
765       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
766       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
767       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
768       idiag -= 36;
769       i2    -= 6;
770       for (i=m-2; i>=0; i--) {
771         v     = aa + 36*(diag[i]+1);
772         vi    = aj + diag[i] + 1;
773         nz    = ai[i+1] - diag[i] - 1;
774         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
775         while (nz--) {
776           idx  = 6*(*vi++);
777           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
778           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
779           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
780           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
781           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
782           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
783           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
784           v   += 36;
785         }
786         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
787         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
788         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
789         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
790         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
791         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
792         idiag   -= 36;
793         i2      -= 6;
794       }
795       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
796     }
797   } else {
798     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
799   }
800   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
801   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
807 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
808 {
809   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
810   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
811   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
812   const PetscScalar  *b;
813   PetscErrorCode     ierr;
814   PetscInt           m = a->mbs,i,i2,nz,idx;
815   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
816 
817   PetscFunctionBegin;
818   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
819   its = its*lits;
820   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
821   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
822   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
823   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
824   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
825 
826   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
827 
828   diag  = a->diag;
829   idiag = a->idiag;
830   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
831   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
832 
833   if (flag & SOR_ZERO_INITIAL_GUESS) {
834     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
835       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
836       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
837       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
838       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
839       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
840       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
841       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
842       i2     = 7;
843       idiag += 49;
844       for (i=1; i<m; i++) {
845         v     = aa + 49*ai[i];
846         vi    = aj + ai[i];
847         nz    = diag[i] - ai[i];
848         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
849         while (nz--) {
850           idx  = 7*(*vi++);
851           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
852           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
853           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
854           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
855           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
856           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
857           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
858           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
859           v   += 49;
860         }
861         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
862         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
863         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
864         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
865         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
866         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
867         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
868         idiag   += 49;
869         i2      += 7;
870       }
871       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
872       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
873     }
874     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
875         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
876       i2    = 0;
877       mdiag = a->idiag+49*a->mbs;
878       for (i=0; i<m; i++) {
879         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
880         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
881         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
882         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
883         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
884         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
885         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
886         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
887         mdiag  += 49;
888         i2     += 7;
889       }
890       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
891     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
892       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
893     }
894     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
895       idiag   = a->idiag+49*a->mbs - 49;
896       i2      = 7*m - 7;
897       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
898       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
899       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
900       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
901       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
902       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
903       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
904       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
905       idiag -= 49;
906       i2    -= 7;
907       for (i=m-2; i>=0; i--) {
908         v     = aa + 49*(diag[i]+1);
909         vi    = aj + diag[i] + 1;
910         nz    = ai[i+1] - diag[i] - 1;
911         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
912         while (nz--) {
913           idx  = 7*(*vi++);
914           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
915           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
916           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
917           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
918           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
919           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
920           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
921           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
922           v   += 49;
923         }
924         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
925         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
926         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
927         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
928         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
929         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
930         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
931         idiag   -= 49;
932         i2      -= 7;
933       }
934       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
935     }
936   } else {
937     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
938   }
939   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
940   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
941   PetscFunctionReturn(0);
942 }
943 
944 #undef __FUNCT__
945 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
946 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
947 {
948   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
949   PetscScalar        *x,*work,*w,*workt;
950   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
951   const PetscScalar  *b;
952   PetscErrorCode     ierr;
953   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
954   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
955 
956   PetscFunctionBegin;
957   its = its*lits;
958   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
959   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
960   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
961   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
962   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
963   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
964 
965   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
966 
967   diag  = a->diag;
968   idiag = a->idiag;
969   if (!a->mult_work) {
970     k    = PetscMax(A->rmap->n,A->cmap->n);
971     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
972   }
973   work = a->mult_work;
974   if (!a->sor_work) {
975     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
976   }
977   w = a->sor_work;
978 
979   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
980   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
981 
982   if (flag & SOR_ZERO_INITIAL_GUESS) {
983     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
984       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
985       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
986       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
987       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
988       i2     = bs;
989       idiag += bs2;
990       for (i=1; i<m; i++) {
991         v     = aa + bs2*ai[i];
992         vi    = aj + ai[i];
993         nz    = diag[i] - ai[i];
994 
995         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
996 	/* copy all rows of x that are needed into contiguous space */
997         workt = work;
998         for (j=0; j<nz; j++) {
999           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1000           workt += bs;
1001         }
1002         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1003        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1004         while (nz--) {
1005           idx  = N*(*vi++);
1006           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1007           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1008           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1009           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1010           v   += N2;
1011 	  } */
1012 
1013         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1014         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1015         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1016         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1017 
1018         idiag   += bs2;
1019         i2      += bs;
1020       }
1021       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1022       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1023     }
1024     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1025         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1026       i2    = 0;
1027       mdiag = a->idiag+bs2*a->mbs;
1028       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1029       for (i=0; i<m; i++) {
1030         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1031         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1032         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1033         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1034         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1035 
1036         mdiag  += bs2;
1037         i2     += bs;
1038       }
1039       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1040     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1041       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1042     }
1043     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1044       idiag   = a->idiag+bs2*a->mbs - bs2;
1045       i2      = bs*m - bs;
1046       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1047       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1048       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1049       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1050       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1051       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1052       idiag -= bs2;
1053       i2    -= bs;
1054       for (i=m-2; i>=0; i--) {
1055         v     = aa + bs2*(diag[i]+1);
1056         vi    = aj + diag[i] + 1;
1057         nz    = ai[i+1] - diag[i] - 1;
1058 
1059         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1060 	/* copy all rows of x that are needed into contiguous space */
1061         workt = work;
1062         for (j=0; j<nz; j++) {
1063           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1064           workt += bs;
1065         }
1066         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1067         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1068         while (nz--) {
1069           idx  = N*(*vi++);
1070           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1071           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1072           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1073           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1074           v   += N2;
1075 	  } */
1076 
1077         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1078         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1079         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1080         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1081         idiag   -= bs2;
1082         i2      -= bs;
1083       }
1084       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1085     }
1086   } else {
1087     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1088   }
1089   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1090   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*
1095     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1096 */
1097 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1098 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1099 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1100 #define matsetvaluesblocked4_ matsetvaluesblocked4
1101 #endif
1102 
1103 EXTERN_C_BEGIN
1104 #undef __FUNCT__
1105 #define __FUNCT__ "matsetvaluesblocked4_"
1106 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1107 {
1108   Mat               A = *AA;
1109   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1110   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1111   PetscInt          *ai=a->i,*ailen=a->ilen;
1112   PetscInt          *aj=a->j,stepval,lastcol = -1;
1113   const PetscScalar *value = v;
1114   MatScalar         *ap,*aa = a->a,*bap;
1115 
1116   PetscFunctionBegin;
1117   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1118   stepval = (n-1)*4;
1119   for (k=0; k<m; k++) { /* loop over added rows */
1120     row  = im[k];
1121     rp   = aj + ai[row];
1122     ap   = aa + 16*ai[row];
1123     nrow = ailen[row];
1124     low  = 0;
1125     high = nrow;
1126     for (l=0; l<n; l++) { /* loop over added columns */
1127       col = in[l];
1128       if (col <= lastcol) low = 0; else high = nrow;
1129       lastcol = col;
1130       value = v + k*(stepval+4 + l)*4;
1131       while (high-low > 7) {
1132         t = (low+high)/2;
1133         if (rp[t] > col) high = t;
1134         else             low  = t;
1135       }
1136       for (i=low; i<high; i++) {
1137         if (rp[i] > col) break;
1138         if (rp[i] == col) {
1139           bap  = ap +  16*i;
1140           for (ii=0; ii<4; ii++,value+=stepval) {
1141             for (jj=ii; jj<16; jj+=4) {
1142               bap[jj] += *value++;
1143             }
1144           }
1145           goto noinsert2;
1146         }
1147       }
1148       N = nrow++ - 1;
1149       high++; /* added new column index thus must search to one higher than before */
1150       /* shift up all the later entries in this row */
1151       for (ii=N; ii>=i; ii--) {
1152         rp[ii+1] = rp[ii];
1153         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1154       }
1155       if (N >= i) {
1156         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1157       }
1158       rp[i] = col;
1159       bap   = ap +  16*i;
1160       for (ii=0; ii<4; ii++,value+=stepval) {
1161         for (jj=ii; jj<16; jj+=4) {
1162           bap[jj] = *value++;
1163         }
1164       }
1165       noinsert2:;
1166       low = i;
1167     }
1168     ailen[row] = nrow;
1169   }
1170   PetscFunctionReturnVoid();
1171 }
1172 EXTERN_C_END
1173 
1174 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1175 #define matsetvalues4_ MATSETVALUES4
1176 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1177 #define matsetvalues4_ matsetvalues4
1178 #endif
1179 
1180 EXTERN_C_BEGIN
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatSetValues4_"
1183 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1184 {
1185   Mat         A = *AA;
1186   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1187   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1188   PetscInt    *ai=a->i,*ailen=a->ilen;
1189   PetscInt    *aj=a->j,brow,bcol;
1190   PetscInt    ridx,cidx,lastcol = -1;
1191   MatScalar   *ap,value,*aa=a->a,*bap;
1192 
1193   PetscFunctionBegin;
1194   for (k=0; k<m; k++) { /* loop over added rows */
1195     row  = im[k]; brow = row/4;
1196     rp   = aj + ai[brow];
1197     ap   = aa + 16*ai[brow];
1198     nrow = ailen[brow];
1199     low  = 0;
1200     high = nrow;
1201     for (l=0; l<n; l++) { /* loop over added columns */
1202       col = in[l]; bcol = col/4;
1203       ridx = row % 4; cidx = col % 4;
1204       value = v[l + k*n];
1205       if (col <= lastcol) low = 0; else high = nrow;
1206       lastcol = col;
1207       while (high-low > 7) {
1208         t = (low+high)/2;
1209         if (rp[t] > bcol) high = t;
1210         else              low  = t;
1211       }
1212       for (i=low; i<high; i++) {
1213         if (rp[i] > bcol) break;
1214         if (rp[i] == bcol) {
1215           bap  = ap +  16*i + 4*cidx + ridx;
1216           *bap += value;
1217           goto noinsert1;
1218         }
1219       }
1220       N = nrow++ - 1;
1221       high++; /* added new column thus must search to one higher than before */
1222       /* shift up all the later entries in this row */
1223       for (ii=N; ii>=i; ii--) {
1224         rp[ii+1] = rp[ii];
1225         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1226       }
1227       if (N>=i) {
1228         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1229       }
1230       rp[i]                    = bcol;
1231       ap[16*i + 4*cidx + ridx] = value;
1232       noinsert1:;
1233       low = i;
1234     }
1235     ailen[brow] = nrow;
1236   }
1237   PetscFunctionReturnVoid();
1238 }
1239 EXTERN_C_END
1240 
1241 /*
1242      Checks for missing diagonals
1243 */
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1246 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1247 {
1248   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1249   PetscErrorCode ierr;
1250   PetscInt       *diag,*jj = a->j,i;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1254   *missing = PETSC_FALSE;
1255   if (A->rmap->n > 0 && !jj) {
1256     *missing  = PETSC_TRUE;
1257     if (d) *d = 0;
1258     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1259   } else {
1260     diag     = a->diag;
1261     for (i=0; i<a->mbs; i++) {
1262       if (jj[diag[i]] != i) {
1263         *missing  = PETSC_TRUE;
1264         if (d) *d = i;
1265         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1266       }
1267     }
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1274 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1275 {
1276   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1277   PetscErrorCode ierr;
1278   PetscInt       i,j,m = a->mbs;
1279 
1280   PetscFunctionBegin;
1281   if (!a->diag) {
1282     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1283     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1284     a->free_diag = PETSC_TRUE;
1285   }
1286   for (i=0; i<m; i++) {
1287     a->diag[i] = a->i[i+1];
1288     for (j=a->i[i]; j<a->i[i+1]; j++) {
1289       if (a->j[j] == i) {
1290         a->diag[i] = j;
1291         break;
1292       }
1293     }
1294   }
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 
1299 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1303 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1304 {
1305   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1306   PetscErrorCode ierr;
1307   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1308   PetscInt       *tia, *tja;
1309 
1310   PetscFunctionBegin;
1311   *nn = n;
1312   if (!ia) PetscFunctionReturn(0);
1313   if (symmetric) {
1314     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1315     nz   = tia[n];
1316   } else {
1317     tia = a->i; tja = a->j;
1318   }
1319 
1320   if (!blockcompressed && bs > 1) {
1321     (*nn) *= bs;
1322     /* malloc & create the natural set of indices */
1323     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1324     if (n) {
1325       (*ia)[0] = 0;
1326       for (j=1; j<bs; j++) {
1327         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1328       }
1329     }
1330 
1331     for (i=1; i<n; i++) {
1332       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1333       for (j=1; j<bs; j++) {
1334         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1335       }
1336     }
1337     if (n) {
1338       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1339     }
1340 
1341     if (ja) {
1342       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1343       cnt = 0;
1344       for (i=0; i<n; i++) {
1345         for (j=0; j<bs; j++) {
1346           for (k=tia[i]; k<tia[i+1]; k++) {
1347             for (l=0; l<bs; l++) {
1348               (*ja)[cnt++] = bs*tja[k] + l;
1349 	    }
1350           }
1351         }
1352       }
1353     }
1354 
1355     n     *= bs;
1356     nz *= bs*bs;
1357     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1358       ierr = PetscFree(tia);CHKERRQ(ierr);
1359       ierr = PetscFree(tja);CHKERRQ(ierr);
1360     }
1361   } else if (oshift == 1) {
1362     if (symmetric) {
1363       PetscInt nz = tia[A->rmap->n/bs];
1364       /*  add 1 to i and j indices */
1365       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1366       *ia = tia;
1367       if (ja) {
1368 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1369         *ja = tja;
1370       }
1371     } else {
1372       PetscInt nz = a->i[A->rmap->n/bs];
1373       /* malloc space and  add 1 to i and j indices */
1374       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1375       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1376       if (ja) {
1377 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1378 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1379       }
1380     }
1381   } else {
1382     *ia = tia;
1383     if (ja) *ja = tja;
1384   }
1385 
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1391 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1392 {
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   if (!ia) PetscFunctionReturn(0);
1397   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1398     ierr = PetscFree(*ia);CHKERRQ(ierr);
1399     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1406 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1407 {
1408   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412 #if defined(PETSC_USE_LOG)
1413   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1414 #endif
1415   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1416   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1417   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1418   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1419   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1420   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1421   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1422   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1423   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1424   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1425   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1426   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1427   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1428 
1429   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1430   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1431   ierr = PetscFree(A->data);CHKERRQ(ierr);
1432 
1433   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1434   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1444   PetscFunctionReturn(0);
1445 }
1446 
1447 #undef __FUNCT__
1448 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1449 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1450 {
1451   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1452   PetscErrorCode ierr;
1453 
1454   PetscFunctionBegin;
1455   switch (op) {
1456   case MAT_ROW_ORIENTED:
1457     a->roworiented    = flg;
1458     break;
1459   case MAT_KEEP_NONZERO_PATTERN:
1460     a->keepnonzeropattern = flg;
1461     break;
1462   case MAT_NEW_NONZERO_LOCATIONS:
1463     a->nonew          = (flg ? 0 : 1);
1464     break;
1465   case MAT_NEW_NONZERO_LOCATION_ERR:
1466     a->nonew          = (flg ? -1 : 0);
1467     break;
1468   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1469     a->nonew          = (flg ? -2 : 0);
1470     break;
1471   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1472     a->nounused       = (flg ? -1 : 0);
1473     break;
1474   case MAT_CHECK_COMPRESSED_ROW:
1475     a->compressedrow.check = flg;
1476     break;
1477   case MAT_NEW_DIAGONALS:
1478   case MAT_IGNORE_OFF_PROC_ENTRIES:
1479   case MAT_USE_HASH_TABLE:
1480     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1481     break;
1482   case MAT_SYMMETRIC:
1483   case MAT_STRUCTURALLY_SYMMETRIC:
1484   case MAT_HERMITIAN:
1485   case MAT_SYMMETRY_ETERNAL:
1486     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1487     break;
1488   default:
1489     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1490   }
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1496 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1497 {
1498   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1499   PetscErrorCode ierr;
1500   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1501   MatScalar      *aa,*aa_i;
1502   PetscScalar    *v_i;
1503 
1504   PetscFunctionBegin;
1505   bs  = A->rmap->bs;
1506   ai  = a->i;
1507   aj  = a->j;
1508   aa  = a->a;
1509   bs2 = a->bs2;
1510 
1511   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1512 
1513   bn  = row/bs;   /* Block number */
1514   bp  = row % bs; /* Block Position */
1515   M   = ai[bn+1] - ai[bn];
1516   *nz = bs*M;
1517 
1518   if (v) {
1519     *v = 0;
1520     if (*nz) {
1521       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1522       for (i=0; i<M; i++) { /* for each block in the block row */
1523         v_i  = *v + i*bs;
1524         aa_i = aa + bs2*(ai[bn] + i);
1525         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1526       }
1527     }
1528   }
1529 
1530   if (idx) {
1531     *idx = 0;
1532     if (*nz) {
1533       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1534       for (i=0; i<M; i++) { /* for each block in the block row */
1535         idx_i = *idx + i*bs;
1536         itmp  = bs*aj[ai[bn] + i];
1537         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1538       }
1539     }
1540   }
1541   PetscFunctionReturn(0);
1542 }
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1546 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1547 {
1548   PetscErrorCode ierr;
1549 
1550   PetscFunctionBegin;
1551   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1552   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1553   PetscFunctionReturn(0);
1554 }
1555 
1556 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1557 
1558 #undef __FUNCT__
1559 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1560 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1561 {
1562   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1563   Mat            C;
1564   PetscErrorCode ierr;
1565   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1566   PetscInt       *rows,*cols,bs2=a->bs2;
1567   MatScalar      *array;
1568 
1569   PetscFunctionBegin;
1570   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1571   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1572     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1573     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1574 
1575     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1576     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1577     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1578     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1579     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1580     ierr = PetscFree(col);CHKERRQ(ierr);
1581   } else {
1582     C = *B;
1583   }
1584 
1585   array = a->a;
1586   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1587   for (i=0; i<mbs; i++) {
1588     cols[0] = i*bs;
1589     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1590     len = ai[i+1] - ai[i];
1591     for (j=0; j<len; j++) {
1592       rows[0] = (*aj++)*bs;
1593       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1594       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1595       array += bs2;
1596     }
1597   }
1598   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1599 
1600   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1601   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1602 
1603   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1604     *B = C;
1605   } else {
1606     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1607   }
1608   PetscFunctionReturn(0);
1609 }
1610 
1611 EXTERN_C_BEGIN
1612 #undef __FUNCT__
1613 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1614 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1615 {
1616   PetscErrorCode ierr;
1617   Mat            Btrans;
1618 
1619   PetscFunctionBegin;
1620   *f = PETSC_FALSE;
1621   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1622   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1623   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1624   PetscFunctionReturn(0);
1625 }
1626 EXTERN_C_END
1627 
1628 #undef __FUNCT__
1629 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1630 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1631 {
1632   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1633   PetscErrorCode ierr;
1634   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1635   int            fd;
1636   PetscScalar    *aa;
1637   FILE           *file;
1638 
1639   PetscFunctionBegin;
1640   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1641   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1642   col_lens[0] = MAT_FILE_CLASSID;
1643 
1644   col_lens[1] = A->rmap->N;
1645   col_lens[2] = A->cmap->n;
1646   col_lens[3] = a->nz*bs2;
1647 
1648   /* store lengths of each row and write (including header) to file */
1649   count = 0;
1650   for (i=0; i<a->mbs; i++) {
1651     for (j=0; j<bs; j++) {
1652       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1653     }
1654   }
1655   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1656   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1657 
1658   /* store column indices (zero start index) */
1659   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1660   count = 0;
1661   for (i=0; i<a->mbs; i++) {
1662     for (j=0; j<bs; j++) {
1663       for (k=a->i[i]; k<a->i[i+1]; k++) {
1664         for (l=0; l<bs; l++) {
1665           jj[count++] = bs*a->j[k] + l;
1666         }
1667       }
1668     }
1669   }
1670   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1671   ierr = PetscFree(jj);CHKERRQ(ierr);
1672 
1673   /* store nonzero values */
1674   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1675   count = 0;
1676   for (i=0; i<a->mbs; i++) {
1677     for (j=0; j<bs; j++) {
1678       for (k=a->i[i]; k<a->i[i+1]; k++) {
1679         for (l=0; l<bs; l++) {
1680           aa[count++] = a->a[bs2*k + l*bs + j];
1681         }
1682       }
1683     }
1684   }
1685   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1686   ierr = PetscFree(aa);CHKERRQ(ierr);
1687 
1688   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1689   if (file) {
1690     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1697 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1698 {
1699   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1700   PetscErrorCode    ierr;
1701   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1702   PetscViewerFormat format;
1703 
1704   PetscFunctionBegin;
1705   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1706   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1707     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1708   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1709     Mat aij;
1710     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1711     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1712     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1713   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1714      PetscFunctionReturn(0);
1715   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1716     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1717     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1718     for (i=0; i<a->mbs; i++) {
1719       for (j=0; j<bs; j++) {
1720         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1721         for (k=a->i[i]; k<a->i[i+1]; k++) {
1722           for (l=0; l<bs; l++) {
1723 #if defined(PETSC_USE_COMPLEX)
1724             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1725               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1726                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1727             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1728               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1729                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1730             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1731               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             }
1733 #else
1734             if (a->a[bs2*k + l*bs + j] != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1736             }
1737 #endif
1738           }
1739         }
1740         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1741       }
1742     }
1743     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1744   } else {
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1746     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1747     for (i=0; i<a->mbs; i++) {
1748       for (j=0; j<bs; j++) {
1749         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1750         for (k=a->i[i]; k<a->i[i+1]; k++) {
1751           for (l=0; l<bs; l++) {
1752 #if defined(PETSC_USE_COMPLEX)
1753             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1754               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1755                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1756             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1757               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1758                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1759             } else {
1760               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             }
1762 #else
1763             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1764 #endif
1765           }
1766         }
1767         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1768       }
1769     }
1770     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1771   }
1772   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 #undef __FUNCT__
1777 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1778 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1779 {
1780   Mat            A = (Mat) Aa;
1781   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1782   PetscErrorCode ierr;
1783   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1784   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1785   MatScalar      *aa;
1786   PetscViewer    viewer;
1787   PetscViewerFormat format;
1788 
1789   PetscFunctionBegin;
1790   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1791   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1792 
1793   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1794 
1795   /* loop over matrix elements drawing boxes */
1796 
1797   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1798     color = PETSC_DRAW_BLUE;
1799     for (i=0,row=0; i<mbs; i++,row+=bs) {
1800       for (j=a->i[i]; j<a->i[i+1]; j++) {
1801         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1802         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1803         aa = a->a + j*bs2;
1804         for (k=0; k<bs; k++) {
1805           for (l=0; l<bs; l++) {
1806             if (PetscRealPart(*aa++) >=  0.) continue;
1807             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1808           }
1809         }
1810       }
1811     }
1812     color = PETSC_DRAW_CYAN;
1813     for (i=0,row=0; i<mbs; i++,row+=bs) {
1814       for (j=a->i[i]; j<a->i[i+1]; j++) {
1815         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1816         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1817         aa = a->a + j*bs2;
1818         for (k=0; k<bs; k++) {
1819           for (l=0; l<bs; l++) {
1820             if (PetscRealPart(*aa++) != 0.) continue;
1821             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1822           }
1823         }
1824       }
1825     }
1826     color = PETSC_DRAW_RED;
1827     for (i=0,row=0; i<mbs; i++,row+=bs) {
1828       for (j=a->i[i]; j<a->i[i+1]; j++) {
1829         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1830         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1831         aa = a->a + j*bs2;
1832         for (k=0; k<bs; k++) {
1833           for (l=0; l<bs; l++) {
1834             if (PetscRealPart(*aa++) <= 0.) continue;
1835             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1836           }
1837         }
1838       }
1839     }
1840   } else {
1841     /* use contour shading to indicate magnitude of values */
1842     /* first determine max of all nonzero values */
1843     PetscDraw   popup;
1844     PetscReal scale,maxv = 0.0;
1845 
1846     for (i=0; i<a->nz*a->bs2; i++) {
1847       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1848     }
1849     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1850     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1851     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1852     for (i=0,row=0; i<mbs; i++,row+=bs) {
1853       for (j=a->i[i]; j<a->i[i+1]; j++) {
1854         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1855         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1856         aa = a->a + j*bs2;
1857         for (k=0; k<bs; k++) {
1858           for (l=0; l<bs; l++) {
1859             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1860             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1861           }
1862         }
1863       }
1864     }
1865   }
1866   PetscFunctionReturn(0);
1867 }
1868 
1869 #undef __FUNCT__
1870 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1871 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1872 {
1873   PetscErrorCode ierr;
1874   PetscReal      xl,yl,xr,yr,w,h;
1875   PetscDraw      draw;
1876   PetscBool      isnull;
1877 
1878   PetscFunctionBegin;
1879 
1880   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1881   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1882 
1883   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1884   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1885   xr += w;    yr += h;  xl = -w;     yl = -h;
1886   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1887   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1888   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 #undef __FUNCT__
1893 #define __FUNCT__ "MatView_SeqBAIJ"
1894 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1895 {
1896   PetscErrorCode ierr;
1897   PetscBool      iascii,isbinary,isdraw;
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1901   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1902   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1903   if (iascii){
1904     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1905   } else if (isbinary) {
1906     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1907   } else if (isdraw) {
1908     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1909   } else {
1910     Mat B;
1911     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1912     ierr = MatView(B,viewer);CHKERRQ(ierr);
1913     ierr = MatDestroy(&B);CHKERRQ(ierr);
1914   }
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 
1919 #undef __FUNCT__
1920 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1921 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1922 {
1923   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1924   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1925   PetscInt    *ai = a->i,*ailen = a->ilen;
1926   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1927   MatScalar   *ap,*aa = a->a;
1928 
1929   PetscFunctionBegin;
1930   for (k=0; k<m; k++) { /* loop over rows */
1931     row  = im[k]; brow = row/bs;
1932     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1933     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1934     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1935     nrow = ailen[brow];
1936     for (l=0; l<n; l++) { /* loop over columns */
1937       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1938       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1939       col  = in[l] ;
1940       bcol = col/bs;
1941       cidx = col%bs;
1942       ridx = row%bs;
1943       high = nrow;
1944       low  = 0; /* assume unsorted */
1945       while (high-low > 5) {
1946         t = (low+high)/2;
1947         if (rp[t] > bcol) high = t;
1948         else             low  = t;
1949       }
1950       for (i=low; i<high; i++) {
1951         if (rp[i] > bcol) break;
1952         if (rp[i] == bcol) {
1953           *v++ = ap[bs2*i+bs*cidx+ridx];
1954           goto finished;
1955         }
1956       }
1957       *v++ = 0.0;
1958       finished:;
1959     }
1960   }
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1966 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1967 {
1968   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1969   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1970   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1971   PetscErrorCode    ierr;
1972   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1973   PetscBool         roworiented=a->roworiented;
1974   const PetscScalar *value = v;
1975   MatScalar         *ap,*aa = a->a,*bap;
1976 
1977   PetscFunctionBegin;
1978   if (roworiented) {
1979     stepval = (n-1)*bs;
1980   } else {
1981     stepval = (m-1)*bs;
1982   }
1983   for (k=0; k<m; k++) { /* loop over added rows */
1984     row  = im[k];
1985     if (row < 0) continue;
1986 #if defined(PETSC_USE_DEBUG)
1987     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1988 #endif
1989     rp   = aj + ai[row];
1990     ap   = aa + bs2*ai[row];
1991     rmax = imax[row];
1992     nrow = ailen[row];
1993     low  = 0;
1994     high = nrow;
1995     for (l=0; l<n; l++) { /* loop over added columns */
1996       if (in[l] < 0) continue;
1997 #if defined(PETSC_USE_DEBUG)
1998       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1999 #endif
2000       col = in[l];
2001       if (roworiented) {
2002         value = v + (k*(stepval+bs) + l)*bs;
2003       } else {
2004         value = v + (l*(stepval+bs) + k)*bs;
2005       }
2006       if (col <= lastcol) low = 0; else high = nrow;
2007       lastcol = col;
2008       while (high-low > 7) {
2009         t = (low+high)/2;
2010         if (rp[t] > col) high = t;
2011         else             low  = t;
2012       }
2013       for (i=low; i<high; i++) {
2014         if (rp[i] > col) break;
2015         if (rp[i] == col) {
2016           bap  = ap +  bs2*i;
2017           if (roworiented) {
2018             if (is == ADD_VALUES) {
2019               for (ii=0; ii<bs; ii++,value+=stepval) {
2020                 for (jj=ii; jj<bs2; jj+=bs) {
2021                   bap[jj] += *value++;
2022                 }
2023               }
2024             } else {
2025               for (ii=0; ii<bs; ii++,value+=stepval) {
2026                 for (jj=ii; jj<bs2; jj+=bs) {
2027                   bap[jj] = *value++;
2028                 }
2029               }
2030             }
2031           } else {
2032             if (is == ADD_VALUES) {
2033               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2034                 for (jj=0; jj<bs; jj++) {
2035                   bap[jj] += value[jj];
2036                 }
2037                 bap += bs;
2038               }
2039             } else {
2040               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2041                 for (jj=0; jj<bs; jj++) {
2042                   bap[jj]  = value[jj];
2043                 }
2044                 bap += bs;
2045               }
2046             }
2047           }
2048           goto noinsert2;
2049         }
2050       }
2051       if (nonew == 1) goto noinsert2;
2052       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2053       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2054       N = nrow++ - 1; high++;
2055       /* shift up all the later entries in this row */
2056       for (ii=N; ii>=i; ii--) {
2057         rp[ii+1] = rp[ii];
2058         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2059       }
2060       if (N >= i) {
2061         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2062       }
2063       rp[i] = col;
2064       bap   = ap +  bs2*i;
2065       if (roworiented) {
2066         for (ii=0; ii<bs; ii++,value+=stepval) {
2067           for (jj=ii; jj<bs2; jj+=bs) {
2068             bap[jj] = *value++;
2069           }
2070         }
2071       } else {
2072         for (ii=0; ii<bs; ii++,value+=stepval) {
2073           for (jj=0; jj<bs; jj++) {
2074             *bap++  = *value++;
2075           }
2076         }
2077       }
2078       noinsert2:;
2079       low = i;
2080     }
2081     ailen[row] = nrow;
2082   }
2083   PetscFunctionReturn(0);
2084 }
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2088 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2089 {
2090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2091   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2092   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2093   PetscErrorCode ierr;
2094   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2095   MatScalar      *aa = a->a,*ap;
2096   PetscReal      ratio=0.6;
2097 
2098   PetscFunctionBegin;
2099   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2100 
2101   if (m) rmax = ailen[0];
2102   for (i=1; i<mbs; i++) {
2103     /* move each row back by the amount of empty slots (fshift) before it*/
2104     fshift += imax[i-1] - ailen[i-1];
2105     rmax   = PetscMax(rmax,ailen[i]);
2106     if (fshift) {
2107       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2108       N = ailen[i];
2109       for (j=0; j<N; j++) {
2110         ip[j-fshift] = ip[j];
2111         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2112       }
2113     }
2114     ai[i] = ai[i-1] + ailen[i-1];
2115   }
2116   if (mbs) {
2117     fshift += imax[mbs-1] - ailen[mbs-1];
2118     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2119   }
2120   /* reset ilen and imax for each row */
2121   for (i=0; i<mbs; i++) {
2122     ailen[i] = imax[i] = ai[i+1] - ai[i];
2123   }
2124   a->nz = ai[mbs];
2125 
2126   /* diagonals may have moved, so kill the diagonal pointers */
2127   a->idiagvalid = PETSC_FALSE;
2128   if (fshift && a->diag) {
2129     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2130     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2131     a->diag = 0;
2132   }
2133   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2134   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2135   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2136   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2137   A->info.mallocs     += a->reallocs;
2138   a->reallocs          = 0;
2139   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2140 
2141   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2142   A->same_nonzero = PETSC_TRUE;
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 /*
2147    This function returns an array of flags which indicate the locations of contiguous
2148    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2149    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2150    Assume: sizes should be long enough to hold all the values.
2151 */
2152 #undef __FUNCT__
2153 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2154 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2155 {
2156   PetscInt   i,j,k,row;
2157   PetscBool  flg;
2158 
2159   PetscFunctionBegin;
2160   for (i=0,j=0; i<n; j++) {
2161     row = idx[i];
2162     if (row%bs!=0) { /* Not the begining of a block */
2163       sizes[j] = 1;
2164       i++;
2165     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2166       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2167       i++;
2168     } else { /* Begining of the block, so check if the complete block exists */
2169       flg = PETSC_TRUE;
2170       for (k=1; k<bs; k++) {
2171         if (row+k != idx[i+k]) { /* break in the block */
2172           flg = PETSC_FALSE;
2173           break;
2174         }
2175       }
2176       if (flg) { /* No break in the bs */
2177         sizes[j] = bs;
2178         i+= bs;
2179       } else {
2180         sizes[j] = 1;
2181         i++;
2182       }
2183     }
2184   }
2185   *bs_max = j;
2186   PetscFunctionReturn(0);
2187 }
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2191 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2192 {
2193   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2194   PetscErrorCode    ierr;
2195   PetscInt          i,j,k,count,*rows;
2196   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2197   PetscScalar       zero = 0.0;
2198   MatScalar         *aa;
2199   const PetscScalar *xx;
2200   PetscScalar       *bb;
2201 
2202   PetscFunctionBegin;
2203   /* fix right hand side if needed */
2204   if (x && b) {
2205     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2206     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2207     for (i=0; i<is_n; i++) {
2208       bb[is_idx[i]] = diag*xx[is_idx[i]];
2209     }
2210     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2211     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2212   }
2213 
2214   /* Make a copy of the IS and  sort it */
2215   /* allocate memory for rows,sizes */
2216   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2217 
2218   /* copy IS values to rows, and sort them */
2219   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2220   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2221 
2222   if (baij->keepnonzeropattern) {
2223     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2224     bs_max = is_n;
2225     A->same_nonzero = PETSC_TRUE;
2226   } else {
2227     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2228     A->same_nonzero = PETSC_FALSE;
2229   }
2230 
2231   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2232     row   = rows[j];
2233     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2234     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2235     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2236     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2237       if (diag != (PetscScalar)0.0) {
2238         if (baij->ilen[row/bs] > 0) {
2239           baij->ilen[row/bs]       = 1;
2240           baij->j[baij->i[row/bs]] = row/bs;
2241           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2242         }
2243         /* Now insert all the diagonal values for this bs */
2244         for (k=0; k<bs; k++) {
2245           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2246         }
2247       } else { /* (diag == 0.0) */
2248         baij->ilen[row/bs] = 0;
2249       } /* end (diag == 0.0) */
2250     } else { /* (sizes[i] != bs) */
2251 #if defined (PETSC_USE_DEBUG)
2252       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2253 #endif
2254       for (k=0; k<count; k++) {
2255         aa[0] =  zero;
2256         aa    += bs;
2257       }
2258       if (diag != (PetscScalar)0.0) {
2259         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2260       }
2261     }
2262   }
2263 
2264   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266   PetscFunctionReturn(0);
2267 }
2268 
2269 #undef __FUNCT__
2270 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2271 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2272 {
2273   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2274   PetscErrorCode    ierr;
2275   PetscInt          i,j,k,count;
2276   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2277   PetscScalar       zero = 0.0;
2278   MatScalar         *aa;
2279   const PetscScalar *xx;
2280   PetscScalar       *bb;
2281   PetscBool         *zeroed,vecs = PETSC_FALSE;
2282 
2283   PetscFunctionBegin;
2284   /* fix right hand side if needed */
2285   if (x && b) {
2286     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2287     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2288     vecs = PETSC_TRUE;
2289   }
2290   A->same_nonzero = PETSC_TRUE;
2291 
2292   /* zero the columns */
2293   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2294   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2295   for (i=0; i<is_n; i++) {
2296     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2297     zeroed[is_idx[i]] = PETSC_TRUE;
2298   }
2299   for (i=0; i<A->rmap->N; i++) {
2300     if (!zeroed[i]) {
2301       row = i/bs;
2302       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2303         for (k=0; k<bs; k++) {
2304           col = bs*baij->j[j] + k;
2305 	  if (zeroed[col]) {
2306 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2307             if (vecs) bb[i] -= aa[0]*xx[col];
2308             aa[0] = 0.0;
2309           }
2310         }
2311       }
2312     } else if (vecs) bb[i] = diag*xx[i];
2313   }
2314   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2315   if (vecs) {
2316     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2317     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2318   }
2319 
2320   /* zero the rows */
2321   for (i=0; i<is_n; i++) {
2322     row   = is_idx[i];
2323     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2324     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2325     for (k=0; k<count; k++) {
2326       aa[0] =  zero;
2327       aa    += bs;
2328     }
2329     if (diag != (PetscScalar)0.0) {
2330       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2331     }
2332   }
2333   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2339 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2340 {
2341   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2342   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2343   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2344   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2345   PetscErrorCode ierr;
2346   PetscInt       ridx,cidx,bs2=a->bs2;
2347   PetscBool      roworiented=a->roworiented;
2348   MatScalar      *ap,value,*aa=a->a,*bap;
2349 
2350   PetscFunctionBegin;
2351   if (v) PetscValidScalarPointer(v,6);
2352   for (k=0; k<m; k++) { /* loop over added rows */
2353     row  = im[k];
2354     brow = row/bs;
2355     if (row < 0) continue;
2356 #if defined(PETSC_USE_DEBUG)
2357     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2358 #endif
2359     rp   = aj + ai[brow];
2360     ap   = aa + bs2*ai[brow];
2361     rmax = imax[brow];
2362     nrow = ailen[brow];
2363     low  = 0;
2364     high = nrow;
2365     for (l=0; l<n; l++) { /* loop over added columns */
2366       if (in[l] < 0) continue;
2367 #if defined(PETSC_USE_DEBUG)
2368       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2369 #endif
2370       col = in[l]; bcol = col/bs;
2371       ridx = row % bs; cidx = col % bs;
2372       if (roworiented) {
2373         value = v[l + k*n];
2374       } else {
2375         value = v[k + l*m];
2376       }
2377       if (col <= lastcol) low = 0; else high = nrow;
2378       lastcol = col;
2379       while (high-low > 7) {
2380         t = (low+high)/2;
2381         if (rp[t] > bcol) high = t;
2382         else              low  = t;
2383       }
2384       for (i=low; i<high; i++) {
2385         if (rp[i] > bcol) break;
2386         if (rp[i] == bcol) {
2387           bap  = ap +  bs2*i + bs*cidx + ridx;
2388           if (is == ADD_VALUES) *bap += value;
2389           else                  *bap  = value;
2390           goto noinsert1;
2391         }
2392       }
2393       if (nonew == 1) goto noinsert1;
2394       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2395       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2396       N = nrow++ - 1; high++;
2397       /* shift up all the later entries in this row */
2398       for (ii=N; ii>=i; ii--) {
2399         rp[ii+1] = rp[ii];
2400         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2401       }
2402       if (N>=i) {
2403         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2404       }
2405       rp[i]                      = bcol;
2406       ap[bs2*i + bs*cidx + ridx] = value;
2407       a->nz++;
2408       noinsert1:;
2409       low = i;
2410     }
2411     ailen[brow] = nrow;
2412   }
2413   A->same_nonzero = PETSC_FALSE;
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2419 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2420 {
2421   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2422   Mat            outA;
2423   PetscErrorCode ierr;
2424   PetscBool      row_identity,col_identity;
2425 
2426   PetscFunctionBegin;
2427   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2428   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2429   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2430   if (!row_identity || !col_identity) {
2431     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2432   }
2433 
2434   outA            = inA;
2435   inA->factortype = MAT_FACTOR_LU;
2436 
2437   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2438 
2439   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2441   a->row = row;
2442   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2443   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2444   a->col = col;
2445 
2446   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2447   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2448    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2449   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2450 
2451   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2452   if (!a->solve_work) {
2453     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2454     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2455   }
2456   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2457 
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 EXTERN_C_BEGIN
2462 #undef __FUNCT__
2463 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2464 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2465 {
2466   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2467   PetscInt    i,nz,mbs;
2468 
2469   PetscFunctionBegin;
2470   nz  = baij->maxnz;
2471   mbs = baij->mbs;
2472   for (i=0; i<nz; i++) {
2473     baij->j[i] = indices[i];
2474   }
2475   baij->nz = nz;
2476   for (i=0; i<mbs; i++) {
2477     baij->ilen[i] = baij->imax[i];
2478   }
2479   PetscFunctionReturn(0);
2480 }
2481 EXTERN_C_END
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2485 /*@
2486     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2487        in the matrix.
2488 
2489   Input Parameters:
2490 +  mat - the SeqBAIJ matrix
2491 -  indices - the column indices
2492 
2493   Level: advanced
2494 
2495   Notes:
2496     This can be called if you have precomputed the nonzero structure of the
2497   matrix and want to provide it to the matrix object to improve the performance
2498   of the MatSetValues() operation.
2499 
2500     You MUST have set the correct numbers of nonzeros per row in the call to
2501   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2502 
2503     MUST be called before any calls to MatSetValues();
2504 
2505 @*/
2506 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2512   PetscValidPointer(indices,2);
2513   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 #undef __FUNCT__
2518 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2519 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2520 {
2521   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2522   PetscErrorCode ierr;
2523   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2524   PetscReal      atmp;
2525   PetscScalar    *x,zero = 0.0;
2526   MatScalar      *aa;
2527   PetscInt       ncols,brow,krow,kcol;
2528 
2529   PetscFunctionBegin;
2530   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2531   bs   = A->rmap->bs;
2532   aa   = a->a;
2533   ai   = a->i;
2534   aj   = a->j;
2535   mbs  = a->mbs;
2536 
2537   ierr = VecSet(v,zero);CHKERRQ(ierr);
2538   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2539   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2540   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2541   for (i=0; i<mbs; i++) {
2542     ncols = ai[1] - ai[0]; ai++;
2543     brow  = bs*i;
2544     for (j=0; j<ncols; j++){
2545       for (kcol=0; kcol<bs; kcol++){
2546         for (krow=0; krow<bs; krow++){
2547           atmp = PetscAbsScalar(*aa);aa++;
2548           row = brow + krow;    /* row index */
2549           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2550           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2551         }
2552       }
2553       aj++;
2554     }
2555   }
2556   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatCopy_SeqBAIJ"
2562 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2563 {
2564   PetscErrorCode ierr;
2565 
2566   PetscFunctionBegin;
2567   /* If the two matrices have the same copy implementation, use fast copy. */
2568   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2569     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2570     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2571     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2572 
2573     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2574     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2575     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2576   } else {
2577     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 #undef __FUNCT__
2583 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2584 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2585 {
2586   PetscErrorCode ierr;
2587 
2588   PetscFunctionBegin;
2589   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 #undef __FUNCT__
2594 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2595 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2596 {
2597   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2598   PetscFunctionBegin;
2599   *array = a->a;
2600   PetscFunctionReturn(0);
2601 }
2602 
2603 #undef __FUNCT__
2604 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2605 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2606 {
2607   PetscFunctionBegin;
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 #undef __FUNCT__
2612 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2613 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2614 {
2615   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2616   PetscErrorCode ierr;
2617   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2618   PetscBLASInt   one=1;
2619 
2620   PetscFunctionBegin;
2621   if (str == SAME_NONZERO_PATTERN) {
2622     PetscScalar alpha = a;
2623     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2624     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2625   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2626     if (y->xtoy && y->XtoY != X) {
2627       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2628       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2629     }
2630     if (!y->xtoy) { /* get xtoy */
2631       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2632       y->XtoY = X;
2633       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2634     }
2635     for (i=0; i<x->nz; i++) {
2636       j = 0;
2637       while (j < bs2){
2638         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2639         j++;
2640       }
2641     }
2642     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2643   } else {
2644     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2645   }
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2651 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2652 {
2653   PetscInt rbs,cbs;
2654   PetscErrorCode ierr;
2655 
2656   PetscFunctionBegin;
2657   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2658   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2659   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2660   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2666 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2667 {
2668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2669   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2670   MatScalar      *aa = a->a;
2671 
2672   PetscFunctionBegin;
2673   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2679 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2680 {
2681   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2682   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2683   MatScalar      *aa = a->a;
2684 
2685   PetscFunctionBegin;
2686   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2687   PetscFunctionReturn(0);
2688 }
2689 
2690 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2694 /*
2695     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2696 */
2697 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2698 {
2699   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2700   PetscErrorCode ierr;
2701   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2702   PetscInt       nz = a->i[m],row,*jj,mr,col;
2703 
2704   PetscFunctionBegin;
2705   *nn = n;
2706   if (!ia) PetscFunctionReturn(0);
2707   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2708   else {
2709     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2710     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2711     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2712     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2713     jj = a->j;
2714     for (i=0; i<nz; i++) {
2715       collengths[jj[i]]++;
2716     }
2717     cia[0] = oshift;
2718     for (i=0; i<n; i++) {
2719       cia[i+1] = cia[i] + collengths[i];
2720     }
2721     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2722     jj   = a->j;
2723     for (row=0; row<m; row++) {
2724       mr = a->i[row+1] - a->i[row];
2725       for (i=0; i<mr; i++) {
2726         col = *jj++;
2727         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2728       }
2729     }
2730     ierr = PetscFree(collengths);CHKERRQ(ierr);
2731     *ia = cia; *ja = cja;
2732   }
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2738 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2739 {
2740   PetscErrorCode ierr;
2741 
2742   PetscFunctionBegin;
2743   if (!ia) PetscFunctionReturn(0);
2744   ierr = PetscFree(*ia);CHKERRQ(ierr);
2745   ierr = PetscFree(*ja);CHKERRQ(ierr);
2746   PetscFunctionReturn(0);
2747 }
2748 
2749 #undef __FUNCT__
2750 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2751 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2752 {
2753   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2754   PetscErrorCode ierr;
2755   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2756   PetscScalar    dx,*y,*xx,*w3_array;
2757   PetscScalar    *vscale_array;
2758   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2759   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2760   void           *fctx = coloring->fctx;
2761   PetscBool      flg = PETSC_FALSE;
2762   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2763   Vec            x1_tmp;
2764 
2765   PetscFunctionBegin;
2766   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2767   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2768   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2769   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2770 
2771   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2772   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2773   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2774   if (flg) {
2775     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2776   } else {
2777     PetscBool  assembled;
2778     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2779     if (assembled) {
2780       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2781     }
2782   }
2783 
2784   x1_tmp = x1;
2785   if (!coloring->vscale){
2786     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2787   }
2788 
2789   /*
2790     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2791     coloring->F for the coarser grids from the finest
2792   */
2793   if (coloring->F) {
2794     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2795     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2796     if (m1 != m2) {
2797       coloring->F = 0;
2798       }
2799     }
2800 
2801   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2802     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2803   }
2804   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2805 
2806   /* Set w1 = F(x1) */
2807   if (coloring->F) {
2808     w1          = coloring->F; /* use already computed value of function */
2809     coloring->F = 0;
2810   } else {
2811     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2812     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2813     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2814   }
2815 
2816   if (!coloring->w3) {
2817     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2818     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2819   }
2820   w3 = coloring->w3;
2821 
2822     CHKMEMQ;
2823     /* Compute all the local scale factors, including ghost points */
2824   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2825   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2826   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2827   if (ctype == IS_COLORING_GHOSTED){
2828     col_start = 0; col_end = N;
2829   } else if (ctype == IS_COLORING_GLOBAL){
2830     xx = xx - start;
2831     vscale_array = vscale_array - start;
2832     col_start = start; col_end = N + start;
2833   }    CHKMEMQ;
2834   for (col=col_start; col<col_end; col++){
2835     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2836     if (coloring->htype[0] == 'w') {
2837       dx = 1.0 + unorm;
2838     } else {
2839       dx  = xx[col];
2840     }
2841     if (dx == (PetscScalar)0.0) dx = 1.0;
2842 #if !defined(PETSC_USE_COMPLEX)
2843     if (dx < umin && dx >= 0.0)      dx = umin;
2844     else if (dx < 0.0 && dx > -umin) dx = -umin;
2845 #else
2846     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2847     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2848 #endif
2849     dx               *= epsilon;
2850     vscale_array[col] = (PetscScalar)1.0/dx;
2851   }     CHKMEMQ;
2852   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2853   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2854   if (ctype == IS_COLORING_GLOBAL){
2855     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2856     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2857   }
2858   CHKMEMQ;
2859   if (coloring->vscaleforrow) {
2860     vscaleforrow = coloring->vscaleforrow;
2861   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2862 
2863   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2864   /*
2865     Loop over each color
2866   */
2867   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2868   for (k=0; k<coloring->ncolors; k++) {
2869     coloring->currentcolor = k;
2870     for (i=0; i<bs; i++) {
2871       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2872       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2873       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2874       /*
2875 	Loop over each column associated with color
2876 	adding the perturbation to the vector w3.
2877       */
2878       for (l=0; l<coloring->ncolumns[k]; l++) {
2879 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2880 	if (coloring->htype[0] == 'w') {
2881 	  dx = 1.0 + unorm;
2882 	} else {
2883 	  dx  = xx[col];
2884 	}
2885 	if (dx == (PetscScalar)0.0) dx = 1.0;
2886 #if !defined(PETSC_USE_COMPLEX)
2887 	if (dx < umin && dx >= 0.0)      dx = umin;
2888 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2889 #else
2890 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2891 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2892 #endif
2893 	dx            *= epsilon;
2894 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2895 	w3_array[col] += dx;
2896       }
2897       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2898       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2899 
2900       /*
2901 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2902 	w2 = F(x1 + dx) - F(x1)
2903       */
2904       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2905       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2906       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2907       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2908 
2909       /*
2910 	Loop over rows of vector, putting results into Jacobian matrix
2911       */
2912       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2913       for (l=0; l<coloring->nrows[k]; l++) {
2914 	row    = bs*coloring->rows[k][l];             /* local row index */
2915 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2916         for (j=0; j<bs; j++) {
2917   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2918           srows[j]  = row + start + j;
2919         }
2920 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2921       }
2922       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2923     }
2924   } /* endof for each color */
2925   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2926   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2927   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2928   ierr = PetscFree(srows);CHKERRQ(ierr);
2929 
2930   coloring->currentcolor = -1;
2931   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2932   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2933   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2934   PetscFunctionReturn(0);
2935 }
2936 
2937 /* -------------------------------------------------------------------*/
2938 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2939        MatGetRow_SeqBAIJ,
2940        MatRestoreRow_SeqBAIJ,
2941        MatMult_SeqBAIJ_N,
2942 /* 4*/ MatMultAdd_SeqBAIJ_N,
2943        MatMultTranspose_SeqBAIJ,
2944        MatMultTransposeAdd_SeqBAIJ,
2945        0,
2946        0,
2947        0,
2948 /*10*/ 0,
2949        MatLUFactor_SeqBAIJ,
2950        0,
2951        0,
2952        MatTranspose_SeqBAIJ,
2953 /*15*/ MatGetInfo_SeqBAIJ,
2954        MatEqual_SeqBAIJ,
2955        MatGetDiagonal_SeqBAIJ,
2956        MatDiagonalScale_SeqBAIJ,
2957        MatNorm_SeqBAIJ,
2958 /*20*/ 0,
2959        MatAssemblyEnd_SeqBAIJ,
2960        MatSetOption_SeqBAIJ,
2961        MatZeroEntries_SeqBAIJ,
2962 /*24*/ MatZeroRows_SeqBAIJ,
2963        0,
2964        0,
2965        0,
2966        0,
2967 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2968        0,
2969        0,
2970        MatGetArray_SeqBAIJ,
2971        MatRestoreArray_SeqBAIJ,
2972 /*34*/ MatDuplicate_SeqBAIJ,
2973        0,
2974        0,
2975        MatILUFactor_SeqBAIJ,
2976        0,
2977 /*39*/ MatAXPY_SeqBAIJ,
2978        MatGetSubMatrices_SeqBAIJ,
2979        MatIncreaseOverlap_SeqBAIJ,
2980        MatGetValues_SeqBAIJ,
2981        MatCopy_SeqBAIJ,
2982 /*44*/ 0,
2983        MatScale_SeqBAIJ,
2984        0,
2985        0,
2986        MatZeroRowsColumns_SeqBAIJ,
2987 /*49*/ MatSetBlockSize_SeqBAIJ,
2988        MatGetRowIJ_SeqBAIJ,
2989        MatRestoreRowIJ_SeqBAIJ,
2990        MatGetColumnIJ_SeqBAIJ,
2991        MatRestoreColumnIJ_SeqBAIJ,
2992 /*54*/ MatFDColoringCreate_SeqAIJ,
2993        0,
2994        0,
2995        0,
2996        MatSetValuesBlocked_SeqBAIJ,
2997 /*59*/ MatGetSubMatrix_SeqBAIJ,
2998        MatDestroy_SeqBAIJ,
2999        MatView_SeqBAIJ,
3000        0,
3001        0,
3002 /*64*/ 0,
3003        0,
3004        0,
3005        0,
3006        0,
3007 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3008        0,
3009        MatConvert_Basic,
3010        0,
3011        0,
3012 /*74*/ 0,
3013        MatFDColoringApply_BAIJ,
3014        0,
3015        0,
3016        0,
3017 /*79*/ 0,
3018        0,
3019        0,
3020        0,
3021        MatLoad_SeqBAIJ,
3022 /*84*/ 0,
3023        0,
3024        0,
3025        0,
3026        0,
3027 /*89*/ 0,
3028        0,
3029        0,
3030        0,
3031        0,
3032 /*94*/ 0,
3033        0,
3034        0,
3035        0,
3036        0,
3037 /*99*/0,
3038        0,
3039        0,
3040        0,
3041        0,
3042 /*104*/0,
3043        MatRealPart_SeqBAIJ,
3044        MatImaginaryPart_SeqBAIJ,
3045        0,
3046        0,
3047 /*109*/0,
3048        0,
3049        0,
3050        0,
3051        MatMissingDiagonal_SeqBAIJ,
3052 /*114*/0,
3053        0,
3054        0,
3055        0,
3056        0,
3057 /*119*/0,
3058        0,
3059        MatMultHermitianTranspose_SeqBAIJ,
3060        MatMultHermitianTransposeAdd_SeqBAIJ,
3061        0,
3062 /*124*/0,
3063        0,
3064        MatInvertBlockDiagonal_SeqBAIJ
3065 };
3066 
3067 EXTERN_C_BEGIN
3068 #undef __FUNCT__
3069 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3070 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3071 {
3072   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3073   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3074   PetscErrorCode ierr;
3075 
3076   PetscFunctionBegin;
3077   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3078 
3079   /* allocate space for values if not already there */
3080   if (!aij->saved_values) {
3081     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3082     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3083   }
3084 
3085   /* copy values over */
3086   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3087   PetscFunctionReturn(0);
3088 }
3089 EXTERN_C_END
3090 
3091 EXTERN_C_BEGIN
3092 #undef __FUNCT__
3093 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3094 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3095 {
3096   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3097   PetscErrorCode ierr;
3098   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3099 
3100   PetscFunctionBegin;
3101   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3102   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3103 
3104   /* copy values over */
3105   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3106   PetscFunctionReturn(0);
3107 }
3108 EXTERN_C_END
3109 
3110 EXTERN_C_BEGIN
3111 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3112 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3113 EXTERN_C_END
3114 
3115 EXTERN_C_BEGIN
3116 #undef __FUNCT__
3117 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3118 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3119 {
3120   Mat_SeqBAIJ    *b;
3121   PetscErrorCode ierr;
3122   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3123   PetscBool      flg,skipallocation = PETSC_FALSE;
3124 
3125   PetscFunctionBegin;
3126 
3127   if (nz == MAT_SKIP_ALLOCATION) {
3128     skipallocation = PETSC_TRUE;
3129     nz             = 0;
3130   }
3131 
3132   if (bs < 0) {
3133     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3134       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3135     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3136     bs   = PetscAbs(bs);
3137   }
3138   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3139   bs   = newbs;
3140 
3141   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3142   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3143   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3144   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3145 
3146   B->preallocated = PETSC_TRUE;
3147 
3148   mbs  = B->rmap->n/bs;
3149   nbs  = B->cmap->n/bs;
3150   bs2  = bs*bs;
3151 
3152   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3153 
3154   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3155   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3156   if (nnz) {
3157     for (i=0; i<mbs; i++) {
3158       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3159       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3160     }
3161   }
3162 
3163   b       = (Mat_SeqBAIJ*)B->data;
3164   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3165     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3166   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3167 
3168   if (!flg) {
3169     switch (bs) {
3170     case 1:
3171       B->ops->mult            = MatMult_SeqBAIJ_1;
3172       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3173       B->ops->sor             = MatSOR_SeqBAIJ_1;
3174       break;
3175     case 2:
3176       B->ops->mult            = MatMult_SeqBAIJ_2;
3177       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3178       B->ops->sor             = MatSOR_SeqBAIJ_2;
3179       break;
3180     case 3:
3181       B->ops->mult            = MatMult_SeqBAIJ_3;
3182       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3183       B->ops->sor             = MatSOR_SeqBAIJ_3;
3184       break;
3185     case 4:
3186       B->ops->mult            = MatMult_SeqBAIJ_4;
3187       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3188       B->ops->sor             = MatSOR_SeqBAIJ_4;
3189       break;
3190     case 5:
3191       B->ops->mult            = MatMult_SeqBAIJ_5;
3192       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3193       B->ops->sor             = MatSOR_SeqBAIJ_5;
3194       break;
3195     case 6:
3196       B->ops->mult            = MatMult_SeqBAIJ_6;
3197       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3198       B->ops->sor             = MatSOR_SeqBAIJ_6;
3199       break;
3200     case 7:
3201       B->ops->mult            = MatMult_SeqBAIJ_7;
3202       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3203       B->ops->sor             = MatSOR_SeqBAIJ_7;
3204       break;
3205     case 15:
3206       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3207       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3208       B->ops->sor             = MatSOR_SeqBAIJ_N;
3209       break;
3210     default:
3211       B->ops->mult            = MatMult_SeqBAIJ_N;
3212       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3213       B->ops->sor             = MatSOR_SeqBAIJ_N;
3214       break;
3215     }
3216   }
3217   B->rmap->bs  = bs;
3218   b->mbs       = mbs;
3219   b->nbs       = nbs;
3220   if (!skipallocation) {
3221     if (!b->imax) {
3222       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3223       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3224       b->free_imax_ilen = PETSC_TRUE;
3225     }
3226     /* b->ilen will count nonzeros in each block row so far. */
3227     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3228     if (!nnz) {
3229       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3230       else if (nz < 0) nz = 1;
3231       for (i=0; i<mbs; i++) b->imax[i] = nz;
3232       nz = nz*mbs;
3233     } else {
3234       nz = 0;
3235       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3236     }
3237 
3238     /* allocate the matrix space */
3239     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3240     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3241     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3242     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3243     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3244     b->singlemalloc = PETSC_TRUE;
3245     b->i[0] = 0;
3246     for (i=1; i<mbs+1; i++) {
3247       b->i[i] = b->i[i-1] + b->imax[i-1];
3248     }
3249     b->free_a     = PETSC_TRUE;
3250     b->free_ij    = PETSC_TRUE;
3251   } else {
3252     b->free_a     = PETSC_FALSE;
3253     b->free_ij    = PETSC_FALSE;
3254   }
3255 
3256   B->rmap->bs          = bs;
3257   b->bs2              = bs2;
3258   b->mbs              = mbs;
3259   b->nz               = 0;
3260   b->maxnz            = nz;
3261   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3262   PetscFunctionReturn(0);
3263 }
3264 EXTERN_C_END
3265 
3266 EXTERN_C_BEGIN
3267 #undef __FUNCT__
3268 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3269 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3270 {
3271   PetscInt       i,m,nz,nz_max=0,*nnz;
3272   PetscScalar    *values=0;
3273   PetscErrorCode ierr;
3274 
3275   PetscFunctionBegin;
3276   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3277   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3278   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3279   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3280   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3281   m = B->rmap->n/bs;
3282 
3283   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3284   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3285   for(i=0; i<m; i++) {
3286     nz = ii[i+1]- ii[i];
3287     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3288     nz_max = PetscMax(nz_max, nz);
3289     nnz[i] = nz;
3290   }
3291   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3292   ierr = PetscFree(nnz);CHKERRQ(ierr);
3293 
3294   values = (PetscScalar*)V;
3295   if (!values) {
3296     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3297     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3298   }
3299   for (i=0; i<m; i++) {
3300     PetscInt          ncols  = ii[i+1] - ii[i];
3301     const PetscInt    *icols = jj + ii[i];
3302     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3303     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3304   }
3305   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3306   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3307   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3308 
3309   PetscFunctionReturn(0);
3310 }
3311 EXTERN_C_END
3312 
3313 
3314 EXTERN_C_BEGIN
3315 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3316 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3317 #if defined(PETSC_HAVE_MUMPS)
3318 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3319 #endif
3320 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3321 EXTERN_C_END
3322 
3323 /*MC
3324    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3325    block sparse compressed row format.
3326 
3327    Options Database Keys:
3328 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3329 
3330   Level: beginner
3331 
3332 .seealso: MatCreateSeqBAIJ()
3333 M*/
3334 
3335 EXTERN_C_BEGIN
3336 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3337 EXTERN_C_END
3338 
3339 EXTERN_C_BEGIN
3340 #undef __FUNCT__
3341 #define __FUNCT__ "MatCreate_SeqBAIJ"
3342 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3343 {
3344   PetscErrorCode ierr;
3345   PetscMPIInt    size;
3346   Mat_SeqBAIJ    *b;
3347 
3348   PetscFunctionBegin;
3349   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3350   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3351 
3352   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3353   B->data = (void*)b;
3354   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3355   b->row                   = 0;
3356   b->col                   = 0;
3357   b->icol                  = 0;
3358   b->reallocs              = 0;
3359   b->saved_values          = 0;
3360 
3361   b->roworiented           = PETSC_TRUE;
3362   b->nonew                 = 0;
3363   b->diag                  = 0;
3364   b->solve_work            = 0;
3365   b->mult_work             = 0;
3366   B->spptr                 = 0;
3367   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3368   b->keepnonzeropattern    = PETSC_FALSE;
3369   b->xtoy                  = 0;
3370   b->XtoY                  = 0;
3371   B->same_nonzero          = PETSC_FALSE;
3372 
3373   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3374                                      "MatGetFactorAvailable_seqbaij_petsc",
3375                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3376   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3377                                      "MatGetFactor_seqbaij_petsc",
3378                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3379   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3380                                      "MatGetFactor_seqbaij_bstrm",
3381                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3382 #if defined(PETSC_HAVE_MUMPS)
3383   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3384 #endif
3385   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3386                                      "MatInvertBlockDiagonal_SeqBAIJ",
3387                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3388   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3389                                      "MatStoreValues_SeqBAIJ",
3390                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3391   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3392                                      "MatRetrieveValues_SeqBAIJ",
3393                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3395                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3396                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3398                                      "MatConvert_SeqBAIJ_SeqAIJ",
3399                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3401                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3402                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3404                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3405                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3407                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3408                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3410                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3411                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3413                                      "MatIsTranspose_SeqBAIJ",
3414                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3415   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 EXTERN_C_END
3419 
3420 #undef __FUNCT__
3421 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3422 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3423 {
3424   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3425   PetscErrorCode ierr;
3426   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3427 
3428   PetscFunctionBegin;
3429   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3430 
3431   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3432     c->imax = a->imax;
3433     c->ilen = a->ilen;
3434     c->free_imax_ilen = PETSC_FALSE;
3435   } else {
3436     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3437     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3438     for (i=0; i<mbs; i++) {
3439       c->imax[i] = a->imax[i];
3440       c->ilen[i] = a->ilen[i];
3441     }
3442     c->free_imax_ilen = PETSC_TRUE;
3443   }
3444 
3445   /* allocate the matrix space */
3446   if (mallocmatspace){
3447     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3448       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3449       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3450       c->singlemalloc = PETSC_FALSE;
3451       c->free_ij      = PETSC_FALSE;
3452       c->i            = a->i;
3453       c->j            = a->j;
3454       c->parent       = A;
3455       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3456       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3457       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3458     } else {
3459       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3460       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3461       c->singlemalloc = PETSC_TRUE;
3462       c->free_ij      = PETSC_TRUE;
3463       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3464       if (mbs > 0) {
3465 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3466 	if (cpvalues == MAT_COPY_VALUES) {
3467 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3468 	} else {
3469 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3470 	}
3471       }
3472     }
3473   }
3474 
3475   c->roworiented = a->roworiented;
3476   c->nonew       = a->nonew;
3477   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3478   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3479   c->bs2         = a->bs2;
3480   c->mbs         = a->mbs;
3481   c->nbs         = a->nbs;
3482 
3483   if (a->diag) {
3484     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3485       c->diag      = a->diag;
3486       c->free_diag = PETSC_FALSE;
3487     } else {
3488       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3489       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3490       for (i=0; i<mbs; i++) {
3491         c->diag[i] = a->diag[i];
3492       }
3493       c->free_diag = PETSC_TRUE;
3494     }
3495   } else c->diag        = 0;
3496   c->nz                 = a->nz;
3497   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3498   c->solve_work         = 0;
3499   c->mult_work          = 0;
3500   c->free_a             = PETSC_TRUE;
3501   c->free_ij            = PETSC_TRUE;
3502   C->preallocated       = PETSC_TRUE;
3503   C->assembled          = PETSC_TRUE;
3504 
3505   c->compressedrow.use     = a->compressedrow.use;
3506   c->compressedrow.nrows   = a->compressedrow.nrows;
3507   c->compressedrow.check   = a->compressedrow.check;
3508   if (a->compressedrow.use){
3509     i = a->compressedrow.nrows;
3510     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3511     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3512     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3513     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3514   } else {
3515     c->compressedrow.use    = PETSC_FALSE;
3516     c->compressedrow.i      = PETSC_NULL;
3517     c->compressedrow.rindex = PETSC_NULL;
3518   }
3519   C->same_nonzero = A->same_nonzero;
3520   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3521   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3522   PetscFunctionReturn(0);
3523 }
3524 
3525 #undef __FUNCT__
3526 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3527 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3528 {
3529     PetscErrorCode ierr;
3530 
3531   PetscFunctionBegin;
3532   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3533   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3534   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3535   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3536   PetscFunctionReturn(0);
3537 }
3538 
3539 #undef __FUNCT__
3540 #define __FUNCT__ "MatLoad_SeqBAIJ"
3541 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3542 {
3543   Mat_SeqBAIJ    *a;
3544   PetscErrorCode ierr;
3545   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3546   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3547   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3548   PetscInt       *masked,nmask,tmp,bs2,ishift;
3549   PetscMPIInt    size;
3550   int            fd;
3551   PetscScalar    *aa;
3552   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3553 
3554   PetscFunctionBegin;
3555   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3556   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3557   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3558   bs2  = bs*bs;
3559 
3560   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3561   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3562   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3563   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3564   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3565   M = header[1]; N = header[2]; nz = header[3];
3566 
3567   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3568   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3569 
3570   /*
3571      This code adds extra rows to make sure the number of rows is
3572     divisible by the blocksize
3573   */
3574   mbs        = M/bs;
3575   extra_rows = bs - M + bs*(mbs);
3576   if (extra_rows == bs) extra_rows = 0;
3577   else                  mbs++;
3578   if (extra_rows) {
3579     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3580   }
3581 
3582   /* Set global sizes if not already set */
3583   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3584     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3585   } else { /* Check if the matrix global sizes are correct */
3586     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3587     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3588       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3589     }
3590     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3591   }
3592 
3593   /* read in row lengths */
3594   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3595   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3596   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3597 
3598   /* read in column indices */
3599   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3600   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3601   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3602 
3603   /* loop over row lengths determining block row lengths */
3604   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3605   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3606   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3607   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3608   rowcount = 0;
3609   nzcount = 0;
3610   for (i=0; i<mbs; i++) {
3611     nmask = 0;
3612     for (j=0; j<bs; j++) {
3613       kmax = rowlengths[rowcount];
3614       for (k=0; k<kmax; k++) {
3615         tmp = jj[nzcount++]/bs;
3616         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3617       }
3618       rowcount++;
3619     }
3620     browlengths[i] += nmask;
3621     /* zero out the mask elements we set */
3622     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3623   }
3624 
3625   /* Do preallocation  */
3626   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3627   a = (Mat_SeqBAIJ*)newmat->data;
3628 
3629   /* set matrix "i" values */
3630   a->i[0] = 0;
3631   for (i=1; i<= mbs; i++) {
3632     a->i[i]      = a->i[i-1] + browlengths[i-1];
3633     a->ilen[i-1] = browlengths[i-1];
3634   }
3635   a->nz         = 0;
3636   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3637 
3638   /* read in nonzero values */
3639   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3640   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3641   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3642 
3643   /* set "a" and "j" values into matrix */
3644   nzcount = 0; jcount = 0;
3645   for (i=0; i<mbs; i++) {
3646     nzcountb = nzcount;
3647     nmask    = 0;
3648     for (j=0; j<bs; j++) {
3649       kmax = rowlengths[i*bs+j];
3650       for (k=0; k<kmax; k++) {
3651         tmp = jj[nzcount++]/bs;
3652 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3653       }
3654     }
3655     /* sort the masked values */
3656     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3657 
3658     /* set "j" values into matrix */
3659     maskcount = 1;
3660     for (j=0; j<nmask; j++) {
3661       a->j[jcount++]  = masked[j];
3662       mask[masked[j]] = maskcount++;
3663     }
3664     /* set "a" values into matrix */
3665     ishift = bs2*a->i[i];
3666     for (j=0; j<bs; j++) {
3667       kmax = rowlengths[i*bs+j];
3668       for (k=0; k<kmax; k++) {
3669         tmp       = jj[nzcountb]/bs ;
3670         block     = mask[tmp] - 1;
3671         point     = jj[nzcountb] - bs*tmp;
3672         idx       = ishift + bs2*block + j + bs*point;
3673         a->a[idx] = (MatScalar)aa[nzcountb++];
3674       }
3675     }
3676     /* zero out the mask elements we set */
3677     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3678   }
3679   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3680 
3681   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3682   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3683   ierr = PetscFree(aa);CHKERRQ(ierr);
3684   ierr = PetscFree(jj);CHKERRQ(ierr);
3685   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3686 
3687   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3688   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3689   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3690   PetscFunctionReturn(0);
3691 }
3692 
3693 #undef __FUNCT__
3694 #define __FUNCT__ "MatCreateSeqBAIJ"
3695 /*@C
3696    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3697    compressed row) format.  For good matrix assembly performance the
3698    user should preallocate the matrix storage by setting the parameter nz
3699    (or the array nnz).  By setting these parameters accurately, performance
3700    during matrix assembly can be increased by more than a factor of 50.
3701 
3702    Collective on MPI_Comm
3703 
3704    Input Parameters:
3705 +  comm - MPI communicator, set to PETSC_COMM_SELF
3706 .  bs - size of block
3707 .  m - number of rows
3708 .  n - number of columns
3709 .  nz - number of nonzero blocks  per block row (same for all rows)
3710 -  nnz - array containing the number of nonzero blocks in the various block rows
3711          (possibly different for each block row) or PETSC_NULL
3712 
3713    Output Parameter:
3714 .  A - the matrix
3715 
3716    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3717    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3718    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3719 
3720    Options Database Keys:
3721 .   -mat_no_unroll - uses code that does not unroll the loops in the
3722                      block calculations (much slower)
3723 .    -mat_block_size - size of the blocks to use
3724 
3725    Level: intermediate
3726 
3727    Notes:
3728    The number of rows and columns must be divisible by blocksize.
3729 
3730    If the nnz parameter is given then the nz parameter is ignored
3731 
3732    A nonzero block is any block that as 1 or more nonzeros in it
3733 
3734    The block AIJ format is fully compatible with standard Fortran 77
3735    storage.  That is, the stored row and column indices can begin at
3736    either one (as in Fortran) or zero.  See the users' manual for details.
3737 
3738    Specify the preallocated storage with either nz or nnz (not both).
3739    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3740    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3741    matrices.
3742 
3743 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3744 @*/
3745 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3746 {
3747   PetscErrorCode ierr;
3748 
3749   PetscFunctionBegin;
3750   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3751   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3752   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3753   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3754   PetscFunctionReturn(0);
3755 }
3756 
3757 #undef __FUNCT__
3758 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3759 /*@C
3760    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3761    per row in the matrix. For good matrix assembly performance the
3762    user should preallocate the matrix storage by setting the parameter nz
3763    (or the array nnz).  By setting these parameters accurately, performance
3764    during matrix assembly can be increased by more than a factor of 50.
3765 
3766    Collective on MPI_Comm
3767 
3768    Input Parameters:
3769 +  A - the matrix
3770 .  bs - size of block
3771 .  nz - number of block nonzeros per block row (same for all rows)
3772 -  nnz - array containing the number of block nonzeros in the various block rows
3773          (possibly different for each block row) or PETSC_NULL
3774 
3775    Options Database Keys:
3776 .   -mat_no_unroll - uses code that does not unroll the loops in the
3777                      block calculations (much slower)
3778 .    -mat_block_size - size of the blocks to use
3779 
3780    Level: intermediate
3781 
3782    Notes:
3783    If the nnz parameter is given then the nz parameter is ignored
3784 
3785    You can call MatGetInfo() to get information on how effective the preallocation was;
3786    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3787    You can also run with the option -info and look for messages with the string
3788    malloc in them to see if additional memory allocation was needed.
3789 
3790    The block AIJ format is fully compatible with standard Fortran 77
3791    storage.  That is, the stored row and column indices can begin at
3792    either one (as in Fortran) or zero.  See the users' manual for details.
3793 
3794    Specify the preallocated storage with either nz or nnz (not both).
3795    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3796    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3797 
3798 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3799 @*/
3800 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3801 {
3802   PetscErrorCode ierr;
3803 
3804   PetscFunctionBegin;
3805   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3806   PetscFunctionReturn(0);
3807 }
3808 
3809 #undef __FUNCT__
3810 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3811 /*@C
3812    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3813    (the default sequential PETSc format).
3814 
3815    Collective on MPI_Comm
3816 
3817    Input Parameters:
3818 +  A - the matrix
3819 .  i - the indices into j for the start of each local row (starts with zero)
3820 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3821 -  v - optional values in the matrix
3822 
3823    Level: developer
3824 
3825 .keywords: matrix, aij, compressed row, sparse
3826 
3827 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3828 @*/
3829 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3830 {
3831   PetscErrorCode ierr;
3832 
3833   PetscFunctionBegin;
3834   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3835   PetscFunctionReturn(0);
3836 }
3837 
3838 
3839 #undef __FUNCT__
3840 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3841 /*@
3842      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3843 
3844      Collective on MPI_Comm
3845 
3846    Input Parameters:
3847 +  comm - must be an MPI communicator of size 1
3848 .  bs - size of block
3849 .  m - number of rows
3850 .  n - number of columns
3851 .  i - row indices
3852 .  j - column indices
3853 -  a - matrix values
3854 
3855    Output Parameter:
3856 .  mat - the matrix
3857 
3858    Level: advanced
3859 
3860    Notes:
3861        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3862     once the matrix is destroyed
3863 
3864        You cannot set new nonzero locations into this matrix, that will generate an error.
3865 
3866        The i and j indices are 0 based
3867 
3868        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3869 
3870 
3871 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3872 
3873 @*/
3874 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3875 {
3876   PetscErrorCode ierr;
3877   PetscInt       ii;
3878   Mat_SeqBAIJ    *baij;
3879 
3880   PetscFunctionBegin;
3881   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3882   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3883 
3884   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3885   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3886   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3887   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3888   baij = (Mat_SeqBAIJ*)(*mat)->data;
3889   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3890   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3891 
3892   baij->i = i;
3893   baij->j = j;
3894   baij->a = a;
3895   baij->singlemalloc = PETSC_FALSE;
3896   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3897   baij->free_a       = PETSC_FALSE;
3898   baij->free_ij      = PETSC_FALSE;
3899 
3900   for (ii=0; ii<m; ii++) {
3901     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3902 #if defined(PETSC_USE_DEBUG)
3903     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3904 #endif
3905   }
3906 #if defined(PETSC_USE_DEBUG)
3907   for (ii=0; ii<baij->i[m]; ii++) {
3908     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3909     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3910   }
3911 #endif
3912 
3913   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3914   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3915   PetscFunctionReturn(0);
3916 }
3917