xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f692024ea6ceda132bc9ff30ca7a31e85cfbbcb2)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs){
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53 	diag    += 4;
54 	mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67 	diag    += 9;
68 	mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77 	diag  += 16;
78 	mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87 	diag  += 25;
88 	mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97 	diag  += 36;
98 	mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107 	diag  += 49;
108 	mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118 	diag  += bs2;
119 	mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253 	v     = aa + 4*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258 	  idx  = 2*vi[j];
259           it   = 4*j;
260 	  x1   = x[idx]; x2 = x[1+idx];
261 	  s1  -= v[it]*x1 + v[it+2]*x2;
262 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296 	v     = aa + 4*(diag[i]+1);
297 	vi    = aj + diag[i] + 1;
298 	nz    = ai[i+1] - diag[i] - 1;
299 	s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301  	  idx  = 2*vi[j];
302           it   = 4*j;
303 	  x1   = x[idx]; x2 = x[1+idx];
304 	  s1  -= v[it]*x1 + v[it+2]*x2;
305 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
306 	}
307 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470 	v     = aa + 16*ai[i];
471 	vi    = aj + ai[i];
472 	nz    = diag[i] - ai[i];
473 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474 	while (nz--) {
475 	  idx  = 4*(*vi++);
476 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481 	  v   += 16;
482 	}
483 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521 	v     = aa + 16*(diag[i]+1);
522 	vi    = aj + diag[i] + 1;
523 	nz    = ai[i+1] - diag[i] - 1;
524 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525 	while (nz--) {
526 	  idx  = 4*(*vi++);
527 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532 	  v   += 16;
533 	}
534 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589 	v     = aa + 25*ai[i];
590 	vi    = aj + ai[i];
591 	nz    = diag[i] - ai[i];
592 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593 	while (nz--) {
594 	  idx  = 5*(*vi++);
595 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601 	  v   += 25;
602 	}
603 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644 	v     = aa + 25*(diag[i]+1);
645 	vi    = aj + diag[i] + 1;
646 	nz    = ai[i+1] - diag[i] - 1;
647 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648 	while (nz--) {
649 	  idx  = 5*(*vi++);
650 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656 	  v   += 25;
657 	}
658 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
987       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999 	/* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014 	  } */
1015 
1016         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063 	/* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078 	  } */
1079 
1080         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol) low = 0; else high = nrow;
1132       lastcol = col;
1133       value = v + k*(stepval+4 + l)*4;
1134       while (high-low > 7) {
1135         t = (low+high)/2;
1136         if (rp[t] > col) high = t;
1137         else             low  = t;
1138       }
1139       for (i=low; i<high; i++) {
1140         if (rp[i] > col) break;
1141         if (rp[i] == col) {
1142           bap  = ap +  16*i;
1143           for (ii=0; ii<4; ii++,value+=stepval) {
1144             for (jj=ii; jj<16; jj+=4) {
1145               bap[jj] += *value++;
1146             }
1147           }
1148           goto noinsert2;
1149         }
1150       }
1151       N = nrow++ - 1;
1152       high++; /* added new column index thus must search to one higher than before */
1153       /* shift up all the later entries in this row */
1154       for (ii=N; ii>=i; ii--) {
1155         rp[ii+1] = rp[ii];
1156         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1157       }
1158       if (N >= i) {
1159         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1160       }
1161       rp[i] = col;
1162       bap   = ap +  16*i;
1163       for (ii=0; ii<4; ii++,value+=stepval) {
1164         for (jj=ii; jj<16; jj+=4) {
1165           bap[jj] = *value++;
1166         }
1167       }
1168       noinsert2:;
1169       low = i;
1170     }
1171     ailen[row] = nrow;
1172   }
1173   PetscFunctionReturnVoid();
1174 }
1175 EXTERN_C_END
1176 
1177 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1178 #define matsetvalues4_ MATSETVALUES4
1179 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1180 #define matsetvalues4_ matsetvalues4
1181 #endif
1182 
1183 EXTERN_C_BEGIN
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetValues4_"
1186 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1187 {
1188   Mat         A = *AA;
1189   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1190   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1191   PetscInt    *ai=a->i,*ailen=a->ilen;
1192   PetscInt    *aj=a->j,brow,bcol;
1193   PetscInt    ridx,cidx,lastcol = -1;
1194   MatScalar   *ap,value,*aa=a->a,*bap;
1195 
1196   PetscFunctionBegin;
1197   for (k=0; k<m; k++) { /* loop over added rows */
1198     row  = im[k]; brow = row/4;
1199     rp   = aj + ai[brow];
1200     ap   = aa + 16*ai[brow];
1201     nrow = ailen[brow];
1202     low  = 0;
1203     high = nrow;
1204     for (l=0; l<n; l++) { /* loop over added columns */
1205       col = in[l]; bcol = col/4;
1206       ridx = row % 4; cidx = col % 4;
1207       value = v[l + k*n];
1208       if (col <= lastcol) low = 0; else high = nrow;
1209       lastcol = col;
1210       while (high-low > 7) {
1211         t = (low+high)/2;
1212         if (rp[t] > bcol) high = t;
1213         else              low  = t;
1214       }
1215       for (i=low; i<high; i++) {
1216         if (rp[i] > bcol) break;
1217         if (rp[i] == bcol) {
1218           bap  = ap +  16*i + 4*cidx + ridx;
1219           *bap += value;
1220           goto noinsert1;
1221         }
1222       }
1223       N = nrow++ - 1;
1224       high++; /* added new column thus must search to one higher than before */
1225       /* shift up all the later entries in this row */
1226       for (ii=N; ii>=i; ii--) {
1227         rp[ii+1] = rp[ii];
1228         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1229       }
1230       if (N>=i) {
1231         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1232       }
1233       rp[i]                    = bcol;
1234       ap[16*i + 4*cidx + ridx] = value;
1235       noinsert1:;
1236       low = i;
1237     }
1238     ailen[brow] = nrow;
1239   }
1240   PetscFunctionReturnVoid();
1241 }
1242 EXTERN_C_END
1243 
1244 /*
1245      Checks for missing diagonals
1246 */
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1249 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1250 {
1251   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1252   PetscErrorCode ierr;
1253   PetscInt       *diag,*jj = a->j,i;
1254 
1255   PetscFunctionBegin;
1256   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1257   *missing = PETSC_FALSE;
1258   if (A->rmap->n > 0 && !jj) {
1259     *missing  = PETSC_TRUE;
1260     if (d) *d = 0;
1261     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1262   } else {
1263     diag     = a->diag;
1264     for (i=0; i<a->mbs; i++) {
1265       if (jj[diag[i]] != i) {
1266         *missing  = PETSC_TRUE;
1267         if (d) *d = i;
1268         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1269       }
1270     }
1271   }
1272   PetscFunctionReturn(0);
1273 }
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1277 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1278 {
1279   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281   PetscInt       i,j,m = a->mbs;
1282 
1283   PetscFunctionBegin;
1284   if (!a->diag) {
1285     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1286     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1287     a->free_diag = PETSC_TRUE;
1288   }
1289   for (i=0; i<m; i++) {
1290     a->diag[i] = a->i[i+1];
1291     for (j=a->i[i]; j<a->i[i+1]; j++) {
1292       if (a->j[j] == i) {
1293         a->diag[i] = j;
1294         break;
1295       }
1296     }
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 
1302 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1306 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1307 {
1308   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1309   PetscErrorCode ierr;
1310   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1311   PetscInt       *tia, *tja;
1312 
1313   PetscFunctionBegin;
1314   *nn = n;
1315   if (!ia) PetscFunctionReturn(0);
1316   if (symmetric) {
1317     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1318     nz   = tia[n];
1319   } else {
1320     tia = a->i; tja = a->j;
1321   }
1322 
1323   if (!blockcompressed && bs > 1) {
1324     (*nn) *= bs;
1325     /* malloc & create the natural set of indices */
1326     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1327     if (n) {
1328       (*ia)[0] = 0;
1329       for (j=1; j<bs; j++) {
1330         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1331       }
1332     }
1333 
1334     for (i=1; i<n; i++) {
1335       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1336       for (j=1; j<bs; j++) {
1337         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1338       }
1339     }
1340     if (n) {
1341       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1342     }
1343 
1344     if (ja) {
1345       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1346       cnt = 0;
1347       for (i=0; i<n; i++) {
1348         for (j=0; j<bs; j++) {
1349           for (k=tia[i]; k<tia[i+1]; k++) {
1350             for (l=0; l<bs; l++) {
1351               (*ja)[cnt++] = bs*tja[k] + l;
1352 	    }
1353           }
1354         }
1355       }
1356     }
1357 
1358     n     *= bs;
1359     nz *= bs*bs;
1360     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1361       ierr = PetscFree(tia);CHKERRQ(ierr);
1362       ierr = PetscFree(tja);CHKERRQ(ierr);
1363     }
1364   } else if (oshift == 1) {
1365     if (symmetric) {
1366       PetscInt nz = tia[A->rmap->n/bs];
1367       /*  add 1 to i and j indices */
1368       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1369       *ia = tia;
1370       if (ja) {
1371 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1372         *ja = tja;
1373       }
1374     } else {
1375       PetscInt nz = a->i[A->rmap->n/bs];
1376       /* malloc space and  add 1 to i and j indices */
1377       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1378       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1379       if (ja) {
1380 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1381 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1382       }
1383     }
1384   } else {
1385     *ia = tia;
1386     if (ja) *ja = tja;
1387   }
1388 
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1394 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1395 {
1396   PetscErrorCode ierr;
1397 
1398   PetscFunctionBegin;
1399   if (!ia) PetscFunctionReturn(0);
1400   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1401     ierr = PetscFree(*ia);CHKERRQ(ierr);
1402     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1409 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1410 {
1411   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415 #if defined(PETSC_USE_LOG)
1416   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1417 #endif
1418   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1419   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1420   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1421   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1422   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1423   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1424   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1425   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1426   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1427   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1428   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1429   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1430   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1431 
1432   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1433   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1434   ierr = PetscFree(A->data);CHKERRQ(ierr);
1435 
1436   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1445   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1446   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1452 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1453 {
1454   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1455   PetscErrorCode ierr;
1456 
1457   PetscFunctionBegin;
1458   switch (op) {
1459   case MAT_ROW_ORIENTED:
1460     a->roworiented    = flg;
1461     break;
1462   case MAT_KEEP_NONZERO_PATTERN:
1463     a->keepnonzeropattern = flg;
1464     break;
1465   case MAT_NEW_NONZERO_LOCATIONS:
1466     a->nonew          = (flg ? 0 : 1);
1467     break;
1468   case MAT_NEW_NONZERO_LOCATION_ERR:
1469     a->nonew          = (flg ? -1 : 0);
1470     break;
1471   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1472     a->nonew          = (flg ? -2 : 0);
1473     break;
1474   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1475     a->nounused       = (flg ? -1 : 0);
1476     break;
1477   case MAT_CHECK_COMPRESSED_ROW:
1478     a->compressedrow.check = flg;
1479     break;
1480   case MAT_NEW_DIAGONALS:
1481   case MAT_IGNORE_OFF_PROC_ENTRIES:
1482   case MAT_USE_HASH_TABLE:
1483     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1484     break;
1485   case MAT_SYMMETRIC:
1486   case MAT_STRUCTURALLY_SYMMETRIC:
1487   case MAT_HERMITIAN:
1488   case MAT_SYMMETRY_ETERNAL:
1489     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1490     break;
1491   default:
1492     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1493   }
1494   PetscFunctionReturn(0);
1495 }
1496 
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1499 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1500 {
1501   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1502   PetscErrorCode ierr;
1503   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1504   MatScalar      *aa,*aa_i;
1505   PetscScalar    *v_i;
1506 
1507   PetscFunctionBegin;
1508   bs  = A->rmap->bs;
1509   ai  = a->i;
1510   aj  = a->j;
1511   aa  = a->a;
1512   bs2 = a->bs2;
1513 
1514   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1515 
1516   bn  = row/bs;   /* Block number */
1517   bp  = row % bs; /* Block Position */
1518   M   = ai[bn+1] - ai[bn];
1519   *nz = bs*M;
1520 
1521   if (v) {
1522     *v = 0;
1523     if (*nz) {
1524       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1525       for (i=0; i<M; i++) { /* for each block in the block row */
1526         v_i  = *v + i*bs;
1527         aa_i = aa + bs2*(ai[bn] + i);
1528         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1529       }
1530     }
1531   }
1532 
1533   if (idx) {
1534     *idx = 0;
1535     if (*nz) {
1536       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1537       for (i=0; i<M; i++) { /* for each block in the block row */
1538         idx_i = *idx + i*bs;
1539         itmp  = bs*aj[ai[bn] + i];
1540         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1541       }
1542     }
1543   }
1544   PetscFunctionReturn(0);
1545 }
1546 
1547 #undef __FUNCT__
1548 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1549 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1550 {
1551   PetscErrorCode ierr;
1552 
1553   PetscFunctionBegin;
1554   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1555   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1556   PetscFunctionReturn(0);
1557 }
1558 
1559 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1560 
1561 #undef __FUNCT__
1562 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1563 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1564 {
1565   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1566   Mat            C;
1567   PetscErrorCode ierr;
1568   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1569   PetscInt       *rows,*cols,bs2=a->bs2;
1570   MatScalar      *array;
1571 
1572   PetscFunctionBegin;
1573   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1574   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1575     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1576     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1577 
1578     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1579     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1580     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1581     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1582     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1583     ierr = PetscFree(col);CHKERRQ(ierr);
1584   } else {
1585     C = *B;
1586   }
1587 
1588   array = a->a;
1589   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1590   for (i=0; i<mbs; i++) {
1591     cols[0] = i*bs;
1592     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1593     len = ai[i+1] - ai[i];
1594     for (j=0; j<len; j++) {
1595       rows[0] = (*aj++)*bs;
1596       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1597       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1598       array += bs2;
1599     }
1600   }
1601   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1602 
1603   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1604   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1605 
1606   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1607     *B = C;
1608   } else {
1609     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1610   }
1611   PetscFunctionReturn(0);
1612 }
1613 
1614 EXTERN_C_BEGIN
1615 #undef __FUNCT__
1616 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1617 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1618 {
1619   PetscErrorCode ierr;
1620   Mat            Btrans;
1621 
1622   PetscFunctionBegin;
1623   *f = PETSC_FALSE;
1624   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1625   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1626   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 EXTERN_C_END
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1633 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1634 {
1635   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1636   PetscErrorCode ierr;
1637   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1638   int            fd;
1639   PetscScalar    *aa;
1640   FILE           *file;
1641 
1642   PetscFunctionBegin;
1643   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1644   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1645   col_lens[0] = MAT_FILE_CLASSID;
1646 
1647   col_lens[1] = A->rmap->N;
1648   col_lens[2] = A->cmap->n;
1649   col_lens[3] = a->nz*bs2;
1650 
1651   /* store lengths of each row and write (including header) to file */
1652   count = 0;
1653   for (i=0; i<a->mbs; i++) {
1654     for (j=0; j<bs; j++) {
1655       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1656     }
1657   }
1658   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1659   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1660 
1661   /* store column indices (zero start index) */
1662   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1663   count = 0;
1664   for (i=0; i<a->mbs; i++) {
1665     for (j=0; j<bs; j++) {
1666       for (k=a->i[i]; k<a->i[i+1]; k++) {
1667         for (l=0; l<bs; l++) {
1668           jj[count++] = bs*a->j[k] + l;
1669         }
1670       }
1671     }
1672   }
1673   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1674   ierr = PetscFree(jj);CHKERRQ(ierr);
1675 
1676   /* store nonzero values */
1677   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1678   count = 0;
1679   for (i=0; i<a->mbs; i++) {
1680     for (j=0; j<bs; j++) {
1681       for (k=a->i[i]; k<a->i[i+1]; k++) {
1682         for (l=0; l<bs; l++) {
1683           aa[count++] = a->a[bs2*k + l*bs + j];
1684         }
1685       }
1686     }
1687   }
1688   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1689   ierr = PetscFree(aa);CHKERRQ(ierr);
1690 
1691   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1692   if (file) {
1693     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1694   }
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 #undef __FUNCT__
1699 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1700 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1701 {
1702   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1703   PetscErrorCode    ierr;
1704   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1705   PetscViewerFormat format;
1706 
1707   PetscFunctionBegin;
1708   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1709   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1710     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1711   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1712     Mat aij;
1713     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1714     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1715     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1716   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1717      PetscFunctionReturn(0);
1718   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1719     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1720     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1721     for (i=0; i<a->mbs; i++) {
1722       for (j=0; j<bs; j++) {
1723         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1724         for (k=a->i[i]; k<a->i[i+1]; k++) {
1725           for (l=0; l<bs; l++) {
1726 #if defined(PETSC_USE_COMPLEX)
1727             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1728               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1729                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1730             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1731               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1732                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1733             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1734               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1735             }
1736 #else
1737             if (a->a[bs2*k + l*bs + j] != 0.0) {
1738               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1739             }
1740 #endif
1741           }
1742         }
1743         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1744       }
1745     }
1746     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1747   } else {
1748     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1749     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1750     for (i=0; i<a->mbs; i++) {
1751       for (j=0; j<bs; j++) {
1752         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1753         for (k=a->i[i]; k<a->i[i+1]; k++) {
1754           for (l=0; l<bs; l++) {
1755 #if defined(PETSC_USE_COMPLEX)
1756             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1757               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1758                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1759             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1760               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1761                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1762             } else {
1763               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1764             }
1765 #else
1766             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1767 #endif
1768           }
1769         }
1770         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1771       }
1772     }
1773     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1774   }
1775   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1781 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1782 {
1783   Mat            A = (Mat) Aa;
1784   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1785   PetscErrorCode ierr;
1786   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1787   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1788   MatScalar      *aa;
1789   PetscViewer    viewer;
1790   PetscViewerFormat format;
1791 
1792   PetscFunctionBegin;
1793   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1794   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1795 
1796   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1797 
1798   /* loop over matrix elements drawing boxes */
1799 
1800   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1801     color = PETSC_DRAW_BLUE;
1802     for (i=0,row=0; i<mbs; i++,row+=bs) {
1803       for (j=a->i[i]; j<a->i[i+1]; j++) {
1804         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1805         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1806         aa = a->a + j*bs2;
1807         for (k=0; k<bs; k++) {
1808           for (l=0; l<bs; l++) {
1809             if (PetscRealPart(*aa++) >=  0.) continue;
1810             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1811           }
1812         }
1813       }
1814     }
1815     color = PETSC_DRAW_CYAN;
1816     for (i=0,row=0; i<mbs; i++,row+=bs) {
1817       for (j=a->i[i]; j<a->i[i+1]; j++) {
1818         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1819         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1820         aa = a->a + j*bs2;
1821         for (k=0; k<bs; k++) {
1822           for (l=0; l<bs; l++) {
1823             if (PetscRealPart(*aa++) != 0.) continue;
1824             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1825           }
1826         }
1827       }
1828     }
1829     color = PETSC_DRAW_RED;
1830     for (i=0,row=0; i<mbs; i++,row+=bs) {
1831       for (j=a->i[i]; j<a->i[i+1]; j++) {
1832         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1833         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1834         aa = a->a + j*bs2;
1835         for (k=0; k<bs; k++) {
1836           for (l=0; l<bs; l++) {
1837             if (PetscRealPart(*aa++) <= 0.) continue;
1838             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1839           }
1840         }
1841       }
1842     }
1843   } else {
1844     /* use contour shading to indicate magnitude of values */
1845     /* first determine max of all nonzero values */
1846     PetscDraw   popup;
1847     PetscReal scale,maxv = 0.0;
1848 
1849     for (i=0; i<a->nz*a->bs2; i++) {
1850       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1851     }
1852     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1853     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1854     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1855     for (i=0,row=0; i<mbs; i++,row+=bs) {
1856       for (j=a->i[i]; j<a->i[i+1]; j++) {
1857         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1858         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1859         aa = a->a + j*bs2;
1860         for (k=0; k<bs; k++) {
1861           for (l=0; l<bs; l++) {
1862             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1863             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1864           }
1865         }
1866       }
1867     }
1868   }
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1874 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1875 {
1876   PetscErrorCode ierr;
1877   PetscReal      xl,yl,xr,yr,w,h;
1878   PetscDraw      draw;
1879   PetscBool      isnull;
1880 
1881   PetscFunctionBegin;
1882 
1883   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1884   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1885 
1886   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1887   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1888   xr += w;    yr += h;  xl = -w;     yl = -h;
1889   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1890   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1891   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1892   PetscFunctionReturn(0);
1893 }
1894 
1895 #undef __FUNCT__
1896 #define __FUNCT__ "MatView_SeqBAIJ"
1897 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1898 {
1899   PetscErrorCode ierr;
1900   PetscBool      iascii,isbinary,isdraw;
1901 
1902   PetscFunctionBegin;
1903   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1904   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1905   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1906   if (iascii){
1907     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1908   } else if (isbinary) {
1909     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1910   } else if (isdraw) {
1911     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1912   } else {
1913     Mat B;
1914     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1915     ierr = MatView(B,viewer);CHKERRQ(ierr);
1916     ierr = MatDestroy(&B);CHKERRQ(ierr);
1917   }
1918   PetscFunctionReturn(0);
1919 }
1920 
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1924 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1925 {
1926   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1927   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1928   PetscInt    *ai = a->i,*ailen = a->ilen;
1929   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1930   MatScalar   *ap,*aa = a->a;
1931 
1932   PetscFunctionBegin;
1933   for (k=0; k<m; k++) { /* loop over rows */
1934     row  = im[k]; brow = row/bs;
1935     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1936     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1937     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1938     nrow = ailen[brow];
1939     for (l=0; l<n; l++) { /* loop over columns */
1940       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1941       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1942       col  = in[l] ;
1943       bcol = col/bs;
1944       cidx = col%bs;
1945       ridx = row%bs;
1946       high = nrow;
1947       low  = 0; /* assume unsorted */
1948       while (high-low > 5) {
1949         t = (low+high)/2;
1950         if (rp[t] > bcol) high = t;
1951         else             low  = t;
1952       }
1953       for (i=low; i<high; i++) {
1954         if (rp[i] > bcol) break;
1955         if (rp[i] == bcol) {
1956           *v++ = ap[bs2*i+bs*cidx+ridx];
1957           goto finished;
1958         }
1959       }
1960       *v++ = 0.0;
1961       finished:;
1962     }
1963   }
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1969 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1970 {
1971   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1972   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1973   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1974   PetscErrorCode    ierr;
1975   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1976   PetscBool         roworiented=a->roworiented;
1977   const PetscScalar *value = v;
1978   MatScalar         *ap,*aa = a->a,*bap;
1979 
1980   PetscFunctionBegin;
1981   if (roworiented) {
1982     stepval = (n-1)*bs;
1983   } else {
1984     stepval = (m-1)*bs;
1985   }
1986   for (k=0; k<m; k++) { /* loop over added rows */
1987     row  = im[k];
1988     if (row < 0) continue;
1989 #if defined(PETSC_USE_DEBUG)
1990     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1991 #endif
1992     rp   = aj + ai[row];
1993     ap   = aa + bs2*ai[row];
1994     rmax = imax[row];
1995     nrow = ailen[row];
1996     low  = 0;
1997     high = nrow;
1998     for (l=0; l<n; l++) { /* loop over added columns */
1999       if (in[l] < 0) continue;
2000 #if defined(PETSC_USE_DEBUG)
2001       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2002 #endif
2003       col = in[l];
2004       if (roworiented) {
2005         value = v + (k*(stepval+bs) + l)*bs;
2006       } else {
2007         value = v + (l*(stepval+bs) + k)*bs;
2008       }
2009       if (col <= lastcol) low = 0; else high = nrow;
2010       lastcol = col;
2011       while (high-low > 7) {
2012         t = (low+high)/2;
2013         if (rp[t] > col) high = t;
2014         else             low  = t;
2015       }
2016       for (i=low; i<high; i++) {
2017         if (rp[i] > col) break;
2018         if (rp[i] == col) {
2019           bap  = ap +  bs2*i;
2020           if (roworiented) {
2021             if (is == ADD_VALUES) {
2022               for (ii=0; ii<bs; ii++,value+=stepval) {
2023                 for (jj=ii; jj<bs2; jj+=bs) {
2024                   bap[jj] += *value++;
2025                 }
2026               }
2027             } else {
2028               for (ii=0; ii<bs; ii++,value+=stepval) {
2029                 for (jj=ii; jj<bs2; jj+=bs) {
2030                   bap[jj] = *value++;
2031                 }
2032               }
2033             }
2034           } else {
2035             if (is == ADD_VALUES) {
2036               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2037                 for (jj=0; jj<bs; jj++) {
2038                   bap[jj] += value[jj];
2039                 }
2040                 bap += bs;
2041               }
2042             } else {
2043               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2044                 for (jj=0; jj<bs; jj++) {
2045                   bap[jj]  = value[jj];
2046                 }
2047                 bap += bs;
2048               }
2049             }
2050           }
2051           goto noinsert2;
2052         }
2053       }
2054       if (nonew == 1) goto noinsert2;
2055       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2056       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2057       N = nrow++ - 1; high++;
2058       /* shift up all the later entries in this row */
2059       for (ii=N; ii>=i; ii--) {
2060         rp[ii+1] = rp[ii];
2061         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2062       }
2063       if (N >= i) {
2064         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2065       }
2066       rp[i] = col;
2067       bap   = ap +  bs2*i;
2068       if (roworiented) {
2069         for (ii=0; ii<bs; ii++,value+=stepval) {
2070           for (jj=ii; jj<bs2; jj+=bs) {
2071             bap[jj] = *value++;
2072           }
2073         }
2074       } else {
2075         for (ii=0; ii<bs; ii++,value+=stepval) {
2076           for (jj=0; jj<bs; jj++) {
2077             *bap++  = *value++;
2078           }
2079         }
2080       }
2081       noinsert2:;
2082       low = i;
2083     }
2084     ailen[row] = nrow;
2085   }
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #undef __FUNCT__
2090 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2091 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2092 {
2093   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2094   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2095   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2096   PetscErrorCode ierr;
2097   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2098   MatScalar      *aa = a->a,*ap;
2099   PetscReal      ratio=0.6;
2100 
2101   PetscFunctionBegin;
2102   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2103 
2104   if (m) rmax = ailen[0];
2105   for (i=1; i<mbs; i++) {
2106     /* move each row back by the amount of empty slots (fshift) before it*/
2107     fshift += imax[i-1] - ailen[i-1];
2108     rmax   = PetscMax(rmax,ailen[i]);
2109     if (fshift) {
2110       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2111       N = ailen[i];
2112       for (j=0; j<N; j++) {
2113         ip[j-fshift] = ip[j];
2114         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2115       }
2116     }
2117     ai[i] = ai[i-1] + ailen[i-1];
2118   }
2119   if (mbs) {
2120     fshift += imax[mbs-1] - ailen[mbs-1];
2121     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2122   }
2123   /* reset ilen and imax for each row */
2124   for (i=0; i<mbs; i++) {
2125     ailen[i] = imax[i] = ai[i+1] - ai[i];
2126   }
2127   a->nz = ai[mbs];
2128 
2129   /* diagonals may have moved, so kill the diagonal pointers */
2130   a->idiagvalid = PETSC_FALSE;
2131   if (fshift && a->diag) {
2132     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2133     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2134     a->diag = 0;
2135   }
2136   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2137   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2138   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2139   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2140   A->info.mallocs     += a->reallocs;
2141   a->reallocs          = 0;
2142   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2143 
2144   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2145   A->same_nonzero = PETSC_TRUE;
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 /*
2150    This function returns an array of flags which indicate the locations of contiguous
2151    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2152    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2153    Assume: sizes should be long enough to hold all the values.
2154 */
2155 #undef __FUNCT__
2156 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2157 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2158 {
2159   PetscInt   i,j,k,row;
2160   PetscBool  flg;
2161 
2162   PetscFunctionBegin;
2163   for (i=0,j=0; i<n; j++) {
2164     row = idx[i];
2165     if (row%bs!=0) { /* Not the begining of a block */
2166       sizes[j] = 1;
2167       i++;
2168     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2169       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2170       i++;
2171     } else { /* Begining of the block, so check if the complete block exists */
2172       flg = PETSC_TRUE;
2173       for (k=1; k<bs; k++) {
2174         if (row+k != idx[i+k]) { /* break in the block */
2175           flg = PETSC_FALSE;
2176           break;
2177         }
2178       }
2179       if (flg) { /* No break in the bs */
2180         sizes[j] = bs;
2181         i+= bs;
2182       } else {
2183         sizes[j] = 1;
2184         i++;
2185       }
2186     }
2187   }
2188   *bs_max = j;
2189   PetscFunctionReturn(0);
2190 }
2191 
2192 #undef __FUNCT__
2193 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2194 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2195 {
2196   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2197   PetscErrorCode    ierr;
2198   PetscInt          i,j,k,count,*rows;
2199   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2200   PetscScalar       zero = 0.0;
2201   MatScalar         *aa;
2202   const PetscScalar *xx;
2203   PetscScalar       *bb;
2204 
2205   PetscFunctionBegin;
2206   /* fix right hand side if needed */
2207   if (x && b) {
2208     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2209     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2210     for (i=0; i<is_n; i++) {
2211       bb[is_idx[i]] = diag*xx[is_idx[i]];
2212     }
2213     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2214     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2215   }
2216 
2217   /* Make a copy of the IS and  sort it */
2218   /* allocate memory for rows,sizes */
2219   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2220 
2221   /* copy IS values to rows, and sort them */
2222   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2223   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2224 
2225   if (baij->keepnonzeropattern) {
2226     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2227     bs_max = is_n;
2228     A->same_nonzero = PETSC_TRUE;
2229   } else {
2230     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2231     A->same_nonzero = PETSC_FALSE;
2232   }
2233 
2234   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2235     row   = rows[j];
2236     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2237     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2238     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2239     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2240       if (diag != (PetscScalar)0.0) {
2241         if (baij->ilen[row/bs] > 0) {
2242           baij->ilen[row/bs]       = 1;
2243           baij->j[baij->i[row/bs]] = row/bs;
2244           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2245         }
2246         /* Now insert all the diagonal values for this bs */
2247         for (k=0; k<bs; k++) {
2248           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2249         }
2250       } else { /* (diag == 0.0) */
2251         baij->ilen[row/bs] = 0;
2252       } /* end (diag == 0.0) */
2253     } else { /* (sizes[i] != bs) */
2254 #if defined (PETSC_USE_DEBUG)
2255       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2256 #endif
2257       for (k=0; k<count; k++) {
2258         aa[0] =  zero;
2259         aa    += bs;
2260       }
2261       if (diag != (PetscScalar)0.0) {
2262         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2263       }
2264     }
2265   }
2266 
2267   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2268   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2269   PetscFunctionReturn(0);
2270 }
2271 
2272 #undef __FUNCT__
2273 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2274 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2275 {
2276   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2277   PetscErrorCode    ierr;
2278   PetscInt          i,j,k,count;
2279   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2280   PetscScalar       zero = 0.0;
2281   MatScalar         *aa;
2282   const PetscScalar *xx;
2283   PetscScalar       *bb;
2284   PetscBool         *zeroed,vecs = PETSC_FALSE;
2285 
2286   PetscFunctionBegin;
2287   /* fix right hand side if needed */
2288   if (x && b) {
2289     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2290     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2291     vecs = PETSC_TRUE;
2292   }
2293   A->same_nonzero = PETSC_TRUE;
2294 
2295   /* zero the columns */
2296   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2297   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2298   for (i=0; i<is_n; i++) {
2299     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2300     zeroed[is_idx[i]] = PETSC_TRUE;
2301   }
2302   for (i=0; i<A->rmap->N; i++) {
2303     if (!zeroed[i]) {
2304       row = i/bs;
2305       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2306         for (k=0; k<bs; k++) {
2307           col = bs*baij->j[j] + k;
2308 	  if (zeroed[col]) {
2309 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2310             if (vecs) bb[i] -= aa[0]*xx[col];
2311             aa[0] = 0.0;
2312           }
2313         }
2314       }
2315     } else if (vecs) bb[i] = diag*xx[i];
2316   }
2317   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2318   if (vecs) {
2319     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2320     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2321   }
2322 
2323   /* zero the rows */
2324   for (i=0; i<is_n; i++) {
2325     row   = is_idx[i];
2326     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2327     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2328     for (k=0; k<count; k++) {
2329       aa[0] =  zero;
2330       aa    += bs;
2331     }
2332     if (diag != (PetscScalar)0.0) {
2333       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2334     }
2335   }
2336   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2342 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2343 {
2344   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2345   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2346   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2347   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2348   PetscErrorCode ierr;
2349   PetscInt       ridx,cidx,bs2=a->bs2;
2350   PetscBool      roworiented=a->roworiented;
2351   MatScalar      *ap,value,*aa=a->a,*bap;
2352 
2353   PetscFunctionBegin;
2354   if (v) PetscValidScalarPointer(v,6);
2355   for (k=0; k<m; k++) { /* loop over added rows */
2356     row  = im[k];
2357     brow = row/bs;
2358     if (row < 0) continue;
2359 #if defined(PETSC_USE_DEBUG)
2360     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2361 #endif
2362     rp   = aj + ai[brow];
2363     ap   = aa + bs2*ai[brow];
2364     rmax = imax[brow];
2365     nrow = ailen[brow];
2366     low  = 0;
2367     high = nrow;
2368     for (l=0; l<n; l++) { /* loop over added columns */
2369       if (in[l] < 0) continue;
2370 #if defined(PETSC_USE_DEBUG)
2371       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2372 #endif
2373       col = in[l]; bcol = col/bs;
2374       ridx = row % bs; cidx = col % bs;
2375       if (roworiented) {
2376         value = v[l + k*n];
2377       } else {
2378         value = v[k + l*m];
2379       }
2380       if (col <= lastcol) low = 0; else high = nrow;
2381       lastcol = col;
2382       while (high-low > 7) {
2383         t = (low+high)/2;
2384         if (rp[t] > bcol) high = t;
2385         else              low  = t;
2386       }
2387       for (i=low; i<high; i++) {
2388         if (rp[i] > bcol) break;
2389         if (rp[i] == bcol) {
2390           bap  = ap +  bs2*i + bs*cidx + ridx;
2391           if (is == ADD_VALUES) *bap += value;
2392           else                  *bap  = value;
2393           goto noinsert1;
2394         }
2395       }
2396       if (nonew == 1) goto noinsert1;
2397       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2398       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2399       N = nrow++ - 1; high++;
2400       /* shift up all the later entries in this row */
2401       for (ii=N; ii>=i; ii--) {
2402         rp[ii+1] = rp[ii];
2403         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2404       }
2405       if (N>=i) {
2406         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2407       }
2408       rp[i]                      = bcol;
2409       ap[bs2*i + bs*cidx + ridx] = value;
2410       a->nz++;
2411       noinsert1:;
2412       low = i;
2413     }
2414     ailen[brow] = nrow;
2415   }
2416   A->same_nonzero = PETSC_FALSE;
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2422 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2423 {
2424   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2425   Mat            outA;
2426   PetscErrorCode ierr;
2427   PetscBool      row_identity,col_identity;
2428 
2429   PetscFunctionBegin;
2430   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2431   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2432   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2433   if (!row_identity || !col_identity) {
2434     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2435   }
2436 
2437   outA            = inA;
2438   inA->factortype = MAT_FACTOR_LU;
2439 
2440   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2441 
2442   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2443   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2444   a->row = row;
2445   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2446   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2447   a->col = col;
2448 
2449   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2450   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2451    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2452   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2453 
2454   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2455   if (!a->solve_work) {
2456     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2457     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2458   }
2459   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2460 
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 EXTERN_C_BEGIN
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2467 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2468 {
2469   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2470   PetscInt    i,nz,mbs;
2471 
2472   PetscFunctionBegin;
2473   nz  = baij->maxnz;
2474   mbs = baij->mbs;
2475   for (i=0; i<nz; i++) {
2476     baij->j[i] = indices[i];
2477   }
2478   baij->nz = nz;
2479   for (i=0; i<mbs; i++) {
2480     baij->ilen[i] = baij->imax[i];
2481   }
2482   PetscFunctionReturn(0);
2483 }
2484 EXTERN_C_END
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2488 /*@
2489     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2490        in the matrix.
2491 
2492   Input Parameters:
2493 +  mat - the SeqBAIJ matrix
2494 -  indices - the column indices
2495 
2496   Level: advanced
2497 
2498   Notes:
2499     This can be called if you have precomputed the nonzero structure of the
2500   matrix and want to provide it to the matrix object to improve the performance
2501   of the MatSetValues() operation.
2502 
2503     You MUST have set the correct numbers of nonzeros per row in the call to
2504   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2505 
2506     MUST be called before any calls to MatSetValues();
2507 
2508 @*/
2509 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2510 {
2511   PetscErrorCode ierr;
2512 
2513   PetscFunctionBegin;
2514   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2515   PetscValidPointer(indices,2);
2516   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2522 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2523 {
2524   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2525   PetscErrorCode ierr;
2526   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2527   PetscReal      atmp;
2528   PetscScalar    *x,zero = 0.0;
2529   MatScalar      *aa;
2530   PetscInt       ncols,brow,krow,kcol;
2531 
2532   PetscFunctionBegin;
2533   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2534   bs   = A->rmap->bs;
2535   aa   = a->a;
2536   ai   = a->i;
2537   aj   = a->j;
2538   mbs  = a->mbs;
2539 
2540   ierr = VecSet(v,zero);CHKERRQ(ierr);
2541   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2542   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2543   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2544   for (i=0; i<mbs; i++) {
2545     ncols = ai[1] - ai[0]; ai++;
2546     brow  = bs*i;
2547     for (j=0; j<ncols; j++){
2548       for (kcol=0; kcol<bs; kcol++){
2549         for (krow=0; krow<bs; krow++){
2550           atmp = PetscAbsScalar(*aa);aa++;
2551           row = brow + krow;    /* row index */
2552           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2553           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2554         }
2555       }
2556       aj++;
2557     }
2558   }
2559   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 #undef __FUNCT__
2564 #define __FUNCT__ "MatCopy_SeqBAIJ"
2565 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2566 {
2567   PetscErrorCode ierr;
2568 
2569   PetscFunctionBegin;
2570   /* If the two matrices have the same copy implementation, use fast copy. */
2571   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2572     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2573     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2574     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2575 
2576     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2577     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2578     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2579   } else {
2580     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2581   }
2582   PetscFunctionReturn(0);
2583 }
2584 
2585 #undef __FUNCT__
2586 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2587 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2588 {
2589   PetscErrorCode ierr;
2590 
2591   PetscFunctionBegin;
2592   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2598 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2599 {
2600   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2601   PetscFunctionBegin;
2602   *array = a->a;
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 #undef __FUNCT__
2607 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2608 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2609 {
2610   PetscFunctionBegin;
2611   PetscFunctionReturn(0);
2612 }
2613 
2614 #undef __FUNCT__
2615 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2616 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2617 {
2618   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2619   PetscErrorCode ierr;
2620   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2621   PetscBLASInt   one=1;
2622 
2623   PetscFunctionBegin;
2624   if (str == SAME_NONZERO_PATTERN) {
2625     PetscScalar alpha = a;
2626     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2627     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2628   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2629     if (y->xtoy && y->XtoY != X) {
2630       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2631       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2632     }
2633     if (!y->xtoy) { /* get xtoy */
2634       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2635       y->XtoY = X;
2636       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2637     }
2638     for (i=0; i<x->nz; i++) {
2639       j = 0;
2640       while (j < bs2){
2641         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2642         j++;
2643       }
2644     }
2645     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2646   } else {
2647     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2648   }
2649   PetscFunctionReturn(0);
2650 }
2651 
2652 #undef __FUNCT__
2653 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2654 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2655 {
2656   PetscInt rbs,cbs;
2657   PetscErrorCode ierr;
2658 
2659   PetscFunctionBegin;
2660   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2661   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2662   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2663   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2664   PetscFunctionReturn(0);
2665 }
2666 
2667 #undef __FUNCT__
2668 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2669 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2670 {
2671   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2672   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2673   MatScalar      *aa = a->a;
2674 
2675   PetscFunctionBegin;
2676   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2682 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2683 {
2684   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2685   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2686   MatScalar      *aa = a->a;
2687 
2688   PetscFunctionBegin;
2689   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2690   PetscFunctionReturn(0);
2691 }
2692 
2693 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2694 
2695 #undef __FUNCT__
2696 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2697 /*
2698     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2699 */
2700 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2701 {
2702   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2703   PetscErrorCode ierr;
2704   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2705   PetscInt       nz = a->i[m],row,*jj,mr,col;
2706 
2707   PetscFunctionBegin;
2708   *nn = n;
2709   if (!ia) PetscFunctionReturn(0);
2710   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2711   else {
2712     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2713     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2714     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2715     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2716     jj = a->j;
2717     for (i=0; i<nz; i++) {
2718       collengths[jj[i]]++;
2719     }
2720     cia[0] = oshift;
2721     for (i=0; i<n; i++) {
2722       cia[i+1] = cia[i] + collengths[i];
2723     }
2724     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2725     jj   = a->j;
2726     for (row=0; row<m; row++) {
2727       mr = a->i[row+1] - a->i[row];
2728       for (i=0; i<mr; i++) {
2729         col = *jj++;
2730         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2731       }
2732     }
2733     ierr = PetscFree(collengths);CHKERRQ(ierr);
2734     *ia = cia; *ja = cja;
2735   }
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2741 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2742 {
2743   PetscErrorCode ierr;
2744 
2745   PetscFunctionBegin;
2746   if (!ia) PetscFunctionReturn(0);
2747   ierr = PetscFree(*ia);CHKERRQ(ierr);
2748   ierr = PetscFree(*ja);CHKERRQ(ierr);
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2754 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2755 {
2756   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2757   PetscErrorCode ierr;
2758   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2759   PetscScalar    dx,*y,*xx,*w3_array;
2760   PetscScalar    *vscale_array;
2761   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2762   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2763   void           *fctx = coloring->fctx;
2764   PetscBool      flg = PETSC_FALSE;
2765   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2766   Vec            x1_tmp;
2767 
2768   PetscFunctionBegin;
2769   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2770   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2771   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2772   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2773 
2774   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2775   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2776   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2777   if (flg) {
2778     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2779   } else {
2780     PetscBool  assembled;
2781     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2782     if (assembled) {
2783       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2784     }
2785   }
2786 
2787   x1_tmp = x1;
2788   if (!coloring->vscale){
2789     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2790   }
2791 
2792   /*
2793     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2794     coloring->F for the coarser grids from the finest
2795   */
2796   if (coloring->F) {
2797     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2798     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2799     if (m1 != m2) {
2800       coloring->F = 0;
2801       }
2802     }
2803 
2804   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2805     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2806   }
2807   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2808 
2809   /* Set w1 = F(x1) */
2810   if (coloring->F) {
2811     w1          = coloring->F; /* use already computed value of function */
2812     coloring->F = 0;
2813   } else {
2814     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2815     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2816     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2817   }
2818 
2819   if (!coloring->w3) {
2820     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2821     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2822   }
2823   w3 = coloring->w3;
2824 
2825     CHKMEMQ;
2826     /* Compute all the local scale factors, including ghost points */
2827   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2828   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2829   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2830   if (ctype == IS_COLORING_GHOSTED){
2831     col_start = 0; col_end = N;
2832   } else if (ctype == IS_COLORING_GLOBAL){
2833     xx = xx - start;
2834     vscale_array = vscale_array - start;
2835     col_start = start; col_end = N + start;
2836   }    CHKMEMQ;
2837   for (col=col_start; col<col_end; col++){
2838     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2839     if (coloring->htype[0] == 'w') {
2840       dx = 1.0 + unorm;
2841     } else {
2842       dx  = xx[col];
2843     }
2844     if (dx == (PetscScalar)0.0) dx = 1.0;
2845 #if !defined(PETSC_USE_COMPLEX)
2846     if (dx < umin && dx >= 0.0)      dx = umin;
2847     else if (dx < 0.0 && dx > -umin) dx = -umin;
2848 #else
2849     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2850     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2851 #endif
2852     dx               *= epsilon;
2853     vscale_array[col] = (PetscScalar)1.0/dx;
2854   }     CHKMEMQ;
2855   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2856   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2857   if (ctype == IS_COLORING_GLOBAL){
2858     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2859     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2860   }
2861   CHKMEMQ;
2862   if (coloring->vscaleforrow) {
2863     vscaleforrow = coloring->vscaleforrow;
2864   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2865 
2866   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2867   /*
2868     Loop over each color
2869   */
2870   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2871   for (k=0; k<coloring->ncolors; k++) {
2872     coloring->currentcolor = k;
2873     for (i=0; i<bs; i++) {
2874       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2875       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2876       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2877       /*
2878 	Loop over each column associated with color
2879 	adding the perturbation to the vector w3.
2880       */
2881       for (l=0; l<coloring->ncolumns[k]; l++) {
2882 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2883 	if (coloring->htype[0] == 'w') {
2884 	  dx = 1.0 + unorm;
2885 	} else {
2886 	  dx  = xx[col];
2887 	}
2888 	if (dx == (PetscScalar)0.0) dx = 1.0;
2889 #if !defined(PETSC_USE_COMPLEX)
2890 	if (dx < umin && dx >= 0.0)      dx = umin;
2891 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2892 #else
2893 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2894 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2895 #endif
2896 	dx            *= epsilon;
2897 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2898 	w3_array[col] += dx;
2899       }
2900       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2901       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2902 
2903       /*
2904 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2905 	w2 = F(x1 + dx) - F(x1)
2906       */
2907       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2908       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2909       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2910       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2911 
2912       /*
2913 	Loop over rows of vector, putting results into Jacobian matrix
2914       */
2915       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2916       for (l=0; l<coloring->nrows[k]; l++) {
2917 	row    = bs*coloring->rows[k][l];             /* local row index */
2918 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2919         for (j=0; j<bs; j++) {
2920   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2921           srows[j]  = row + start + j;
2922         }
2923 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2924       }
2925       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2926     }
2927   } /* endof for each color */
2928   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2929   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2930   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2931   ierr = PetscFree(srows);CHKERRQ(ierr);
2932 
2933   coloring->currentcolor = -1;
2934   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2935   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2936   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2937   PetscFunctionReturn(0);
2938 }
2939 
2940 /* -------------------------------------------------------------------*/
2941 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2942        MatGetRow_SeqBAIJ,
2943        MatRestoreRow_SeqBAIJ,
2944        MatMult_SeqBAIJ_N,
2945 /* 4*/ MatMultAdd_SeqBAIJ_N,
2946        MatMultTranspose_SeqBAIJ,
2947        MatMultTransposeAdd_SeqBAIJ,
2948        0,
2949        0,
2950        0,
2951 /*10*/ 0,
2952        MatLUFactor_SeqBAIJ,
2953        0,
2954        0,
2955        MatTranspose_SeqBAIJ,
2956 /*15*/ MatGetInfo_SeqBAIJ,
2957        MatEqual_SeqBAIJ,
2958        MatGetDiagonal_SeqBAIJ,
2959        MatDiagonalScale_SeqBAIJ,
2960        MatNorm_SeqBAIJ,
2961 /*20*/ 0,
2962        MatAssemblyEnd_SeqBAIJ,
2963        MatSetOption_SeqBAIJ,
2964        MatZeroEntries_SeqBAIJ,
2965 /*24*/ MatZeroRows_SeqBAIJ,
2966        0,
2967        0,
2968        0,
2969        0,
2970 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2971        0,
2972        0,
2973        MatGetArray_SeqBAIJ,
2974        MatRestoreArray_SeqBAIJ,
2975 /*34*/ MatDuplicate_SeqBAIJ,
2976        0,
2977        0,
2978        MatILUFactor_SeqBAIJ,
2979        0,
2980 /*39*/ MatAXPY_SeqBAIJ,
2981        MatGetSubMatrices_SeqBAIJ,
2982        MatIncreaseOverlap_SeqBAIJ,
2983        MatGetValues_SeqBAIJ,
2984        MatCopy_SeqBAIJ,
2985 /*44*/ 0,
2986        MatScale_SeqBAIJ,
2987        0,
2988        0,
2989        MatZeroRowsColumns_SeqBAIJ,
2990 /*49*/ MatSetBlockSize_SeqBAIJ,
2991        MatGetRowIJ_SeqBAIJ,
2992        MatRestoreRowIJ_SeqBAIJ,
2993        MatGetColumnIJ_SeqBAIJ,
2994        MatRestoreColumnIJ_SeqBAIJ,
2995 /*54*/ MatFDColoringCreate_SeqAIJ,
2996        0,
2997        0,
2998        0,
2999        MatSetValuesBlocked_SeqBAIJ,
3000 /*59*/ MatGetSubMatrix_SeqBAIJ,
3001        MatDestroy_SeqBAIJ,
3002        MatView_SeqBAIJ,
3003        0,
3004        0,
3005 /*64*/ 0,
3006        0,
3007        0,
3008        0,
3009        0,
3010 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3011        0,
3012        MatConvert_Basic,
3013        0,
3014        0,
3015 /*74*/ 0,
3016        MatFDColoringApply_BAIJ,
3017        0,
3018        0,
3019        0,
3020 /*79*/ 0,
3021        0,
3022        0,
3023        0,
3024        MatLoad_SeqBAIJ,
3025 /*84*/ 0,
3026        0,
3027        0,
3028        0,
3029        0,
3030 /*89*/ 0,
3031        0,
3032        0,
3033        0,
3034        0,
3035 /*94*/ 0,
3036        0,
3037        0,
3038        0,
3039        0,
3040 /*99*/0,
3041        0,
3042        0,
3043        0,
3044        0,
3045 /*104*/0,
3046        MatRealPart_SeqBAIJ,
3047        MatImaginaryPart_SeqBAIJ,
3048        0,
3049        0,
3050 /*109*/0,
3051        0,
3052        0,
3053        0,
3054        MatMissingDiagonal_SeqBAIJ,
3055 /*114*/0,
3056        0,
3057        0,
3058        0,
3059        0,
3060 /*119*/0,
3061        0,
3062        MatMultHermitianTranspose_SeqBAIJ,
3063        MatMultHermitianTransposeAdd_SeqBAIJ,
3064        0,
3065 /*124*/0,
3066        0,
3067        MatInvertBlockDiagonal_SeqBAIJ
3068 };
3069 
3070 EXTERN_C_BEGIN
3071 #undef __FUNCT__
3072 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3073 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3074 {
3075   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3076   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3077   PetscErrorCode ierr;
3078 
3079   PetscFunctionBegin;
3080   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3081 
3082   /* allocate space for values if not already there */
3083   if (!aij->saved_values) {
3084     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3085     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3086   }
3087 
3088   /* copy values over */
3089   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3090   PetscFunctionReturn(0);
3091 }
3092 EXTERN_C_END
3093 
3094 EXTERN_C_BEGIN
3095 #undef __FUNCT__
3096 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3097 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3098 {
3099   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3100   PetscErrorCode ierr;
3101   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3102 
3103   PetscFunctionBegin;
3104   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3105   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3106 
3107   /* copy values over */
3108   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3109   PetscFunctionReturn(0);
3110 }
3111 EXTERN_C_END
3112 
3113 EXTERN_C_BEGIN
3114 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3115 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3116 EXTERN_C_END
3117 
3118 EXTERN_C_BEGIN
3119 #undef __FUNCT__
3120 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3121 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3122 {
3123   Mat_SeqBAIJ    *b;
3124   PetscErrorCode ierr;
3125   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3126   PetscBool      flg,skipallocation = PETSC_FALSE;
3127 
3128   PetscFunctionBegin;
3129 
3130   if (nz == MAT_SKIP_ALLOCATION) {
3131     skipallocation = PETSC_TRUE;
3132     nz             = 0;
3133   }
3134 
3135   if (bs < 0) {
3136     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3137       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3138     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3139     bs   = PetscAbs(bs);
3140   }
3141   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3142   bs   = newbs;
3143 
3144   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3145   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3146   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3147   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3148 
3149   B->preallocated = PETSC_TRUE;
3150 
3151   mbs  = B->rmap->n/bs;
3152   nbs  = B->cmap->n/bs;
3153   bs2  = bs*bs;
3154 
3155   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3156 
3157   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3158   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3159   if (nnz) {
3160     for (i=0; i<mbs; i++) {
3161       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3162       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3163     }
3164   }
3165 
3166   b       = (Mat_SeqBAIJ*)B->data;
3167   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3168     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3169   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3170 
3171   if (!flg) {
3172     switch (bs) {
3173     case 1:
3174       B->ops->mult            = MatMult_SeqBAIJ_1;
3175       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3176       B->ops->sor             = MatSOR_SeqBAIJ_1;
3177       break;
3178     case 2:
3179       B->ops->mult            = MatMult_SeqBAIJ_2;
3180       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3181       B->ops->sor             = MatSOR_SeqBAIJ_2;
3182       break;
3183     case 3:
3184       B->ops->mult            = MatMult_SeqBAIJ_3;
3185       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3186       B->ops->sor             = MatSOR_SeqBAIJ_3;
3187       break;
3188     case 4:
3189       B->ops->mult            = MatMult_SeqBAIJ_4;
3190       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3191       B->ops->sor             = MatSOR_SeqBAIJ_4;
3192       break;
3193     case 5:
3194       B->ops->mult            = MatMult_SeqBAIJ_5;
3195       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3196       B->ops->sor             = MatSOR_SeqBAIJ_5;
3197       break;
3198     case 6:
3199       B->ops->mult            = MatMult_SeqBAIJ_6;
3200       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3201       B->ops->sor             = MatSOR_SeqBAIJ_6;
3202       break;
3203     case 7:
3204       B->ops->mult            = MatMult_SeqBAIJ_7;
3205       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3206       B->ops->sor             = MatSOR_SeqBAIJ_7;
3207       break;
3208     case 15:
3209       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3210       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3211       B->ops->sor             = MatSOR_SeqBAIJ_N;
3212       break;
3213     default:
3214       B->ops->mult            = MatMult_SeqBAIJ_N;
3215       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3216       B->ops->sor             = MatSOR_SeqBAIJ_N;
3217       break;
3218     }
3219   }
3220   B->rmap->bs  = bs;
3221   b->mbs       = mbs;
3222   b->nbs       = nbs;
3223   if (!skipallocation) {
3224     if (!b->imax) {
3225       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3226       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3227       b->free_imax_ilen = PETSC_TRUE;
3228     }
3229     /* b->ilen will count nonzeros in each block row so far. */
3230     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3231     if (!nnz) {
3232       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3233       else if (nz < 0) nz = 1;
3234       for (i=0; i<mbs; i++) b->imax[i] = nz;
3235       nz = nz*mbs;
3236     } else {
3237       nz = 0;
3238       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3239     }
3240 
3241     /* allocate the matrix space */
3242     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3243     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3244     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3245     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3246     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3247     b->singlemalloc = PETSC_TRUE;
3248     b->i[0] = 0;
3249     for (i=1; i<mbs+1; i++) {
3250       b->i[i] = b->i[i-1] + b->imax[i-1];
3251     }
3252     b->free_a     = PETSC_TRUE;
3253     b->free_ij    = PETSC_TRUE;
3254   } else {
3255     b->free_a     = PETSC_FALSE;
3256     b->free_ij    = PETSC_FALSE;
3257   }
3258 
3259   B->rmap->bs          = bs;
3260   b->bs2              = bs2;
3261   b->mbs              = mbs;
3262   b->nz               = 0;
3263   b->maxnz            = nz;
3264   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3265   ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3266   PetscFunctionReturn(0);
3267 }
3268 EXTERN_C_END
3269 
3270 EXTERN_C_BEGIN
3271 #undef __FUNCT__
3272 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3273 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3274 {
3275   PetscInt       i,m,nz,nz_max=0,*nnz;
3276   PetscScalar    *values=0;
3277   PetscErrorCode ierr;
3278 
3279   PetscFunctionBegin;
3280   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3281   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3282   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3283   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3284   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3285   m = B->rmap->n/bs;
3286 
3287   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3288   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3289   for(i=0; i<m; i++) {
3290     nz = ii[i+1]- ii[i];
3291     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3292     nz_max = PetscMax(nz_max, nz);
3293     nnz[i] = nz;
3294   }
3295   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3296   ierr = PetscFree(nnz);CHKERRQ(ierr);
3297 
3298   values = (PetscScalar*)V;
3299   if (!values) {
3300     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3301     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3302   }
3303   for (i=0; i<m; i++) {
3304     PetscInt          ncols  = ii[i+1] - ii[i];
3305     const PetscInt    *icols = jj + ii[i];
3306     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3307     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3308   }
3309   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3310   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3311   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3312   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3313   PetscFunctionReturn(0);
3314 }
3315 EXTERN_C_END
3316 
3317 
3318 EXTERN_C_BEGIN
3319 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3320 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3321 #if defined(PETSC_HAVE_MUMPS)
3322 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3323 #endif
3324 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3325 EXTERN_C_END
3326 
3327 /*MC
3328    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3329    block sparse compressed row format.
3330 
3331    Options Database Keys:
3332 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3333 
3334   Level: beginner
3335 
3336 .seealso: MatCreateSeqBAIJ()
3337 M*/
3338 
3339 EXTERN_C_BEGIN
3340 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3341 EXTERN_C_END
3342 
3343 EXTERN_C_BEGIN
3344 #undef __FUNCT__
3345 #define __FUNCT__ "MatCreate_SeqBAIJ"
3346 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3347 {
3348   PetscErrorCode ierr;
3349   PetscMPIInt    size;
3350   Mat_SeqBAIJ    *b;
3351 
3352   PetscFunctionBegin;
3353   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3354   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3355 
3356   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3357   B->data = (void*)b;
3358   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3359   b->row                   = 0;
3360   b->col                   = 0;
3361   b->icol                  = 0;
3362   b->reallocs              = 0;
3363   b->saved_values          = 0;
3364 
3365   b->roworiented           = PETSC_TRUE;
3366   b->nonew                 = 0;
3367   b->diag                  = 0;
3368   b->solve_work            = 0;
3369   b->mult_work             = 0;
3370   B->spptr                 = 0;
3371   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3372   b->keepnonzeropattern    = PETSC_FALSE;
3373   b->xtoy                  = 0;
3374   b->XtoY                  = 0;
3375   B->same_nonzero          = PETSC_FALSE;
3376 
3377   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3378                                      "MatGetFactorAvailable_seqbaij_petsc",
3379                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3380   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3381                                      "MatGetFactor_seqbaij_petsc",
3382                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3383   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3384                                      "MatGetFactor_seqbaij_bstrm",
3385                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3386 #if defined(PETSC_HAVE_MUMPS)
3387   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3388 #endif
3389   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3390                                      "MatInvertBlockDiagonal_SeqBAIJ",
3391                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3392   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3393                                      "MatStoreValues_SeqBAIJ",
3394                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3395   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3396                                      "MatRetrieveValues_SeqBAIJ",
3397                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3398   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3399                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3400                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3401   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3402                                      "MatConvert_SeqBAIJ_SeqAIJ",
3403                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3405                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3406                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3407   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3408                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3409                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3411                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3412                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3414                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3415                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3417                                      "MatIsTranspose_SeqBAIJ",
3418                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3419   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 EXTERN_C_END
3423 
3424 #undef __FUNCT__
3425 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3426 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3427 {
3428   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3429   PetscErrorCode ierr;
3430   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3431 
3432   PetscFunctionBegin;
3433   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3434 
3435   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3436     c->imax = a->imax;
3437     c->ilen = a->ilen;
3438     c->free_imax_ilen = PETSC_FALSE;
3439   } else {
3440     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3441     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3442     for (i=0; i<mbs; i++) {
3443       c->imax[i] = a->imax[i];
3444       c->ilen[i] = a->ilen[i];
3445     }
3446     c->free_imax_ilen = PETSC_TRUE;
3447   }
3448 
3449   /* allocate the matrix space */
3450   if (mallocmatspace){
3451     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3452       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3453       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3454       c->singlemalloc = PETSC_FALSE;
3455       c->free_ij      = PETSC_FALSE;
3456       c->i            = a->i;
3457       c->j            = a->j;
3458       c->parent       = A;
3459       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3460       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3461       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3462     } else {
3463       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3464       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3465       c->singlemalloc = PETSC_TRUE;
3466       c->free_ij      = PETSC_TRUE;
3467       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3468       if (mbs > 0) {
3469 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3470 	if (cpvalues == MAT_COPY_VALUES) {
3471 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3472 	} else {
3473 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3474 	}
3475       }
3476     }
3477   }
3478 
3479   c->roworiented = a->roworiented;
3480   c->nonew       = a->nonew;
3481   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3482   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3483   c->bs2         = a->bs2;
3484   c->mbs         = a->mbs;
3485   c->nbs         = a->nbs;
3486 
3487   if (a->diag) {
3488     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3489       c->diag      = a->diag;
3490       c->free_diag = PETSC_FALSE;
3491     } else {
3492       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3493       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3494       for (i=0; i<mbs; i++) {
3495         c->diag[i] = a->diag[i];
3496       }
3497       c->free_diag = PETSC_TRUE;
3498     }
3499   } else c->diag        = 0;
3500   c->nz                 = a->nz;
3501   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3502   c->solve_work         = 0;
3503   c->mult_work          = 0;
3504   c->free_a             = PETSC_TRUE;
3505   c->free_ij            = PETSC_TRUE;
3506   C->preallocated       = PETSC_TRUE;
3507   C->assembled          = PETSC_TRUE;
3508 
3509   c->compressedrow.use     = a->compressedrow.use;
3510   c->compressedrow.nrows   = a->compressedrow.nrows;
3511   c->compressedrow.check   = a->compressedrow.check;
3512   if (a->compressedrow.use){
3513     i = a->compressedrow.nrows;
3514     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3515     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3516     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3517     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3518   } else {
3519     c->compressedrow.use    = PETSC_FALSE;
3520     c->compressedrow.i      = PETSC_NULL;
3521     c->compressedrow.rindex = PETSC_NULL;
3522   }
3523   C->same_nonzero = A->same_nonzero;
3524   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3525   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3526   PetscFunctionReturn(0);
3527 }
3528 
3529 #undef __FUNCT__
3530 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3531 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3532 {
3533     PetscErrorCode ierr;
3534 
3535   PetscFunctionBegin;
3536   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3537   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3538   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3539   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3540   PetscFunctionReturn(0);
3541 }
3542 
3543 #undef __FUNCT__
3544 #define __FUNCT__ "MatLoad_SeqBAIJ"
3545 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3546 {
3547   Mat_SeqBAIJ    *a;
3548   PetscErrorCode ierr;
3549   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3550   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3551   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3552   PetscInt       *masked,nmask,tmp,bs2,ishift;
3553   PetscMPIInt    size;
3554   int            fd;
3555   PetscScalar    *aa;
3556   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3557 
3558   PetscFunctionBegin;
3559   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3560   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3561   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3562   bs2  = bs*bs;
3563 
3564   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3565   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3566   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3567   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3568   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3569   M = header[1]; N = header[2]; nz = header[3];
3570 
3571   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3572   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3573 
3574   /*
3575      This code adds extra rows to make sure the number of rows is
3576     divisible by the blocksize
3577   */
3578   mbs        = M/bs;
3579   extra_rows = bs - M + bs*(mbs);
3580   if (extra_rows == bs) extra_rows = 0;
3581   else                  mbs++;
3582   if (extra_rows) {
3583     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3584   }
3585 
3586   /* Set global sizes if not already set */
3587   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3588     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3589   } else { /* Check if the matrix global sizes are correct */
3590     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3591     if (rows < 0 && cols < 0){ /* user might provide local size instead of global size */
3592       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3593     }
3594     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3595   }
3596 
3597   /* read in row lengths */
3598   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3599   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3600   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3601 
3602   /* read in column indices */
3603   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3604   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3605   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3606 
3607   /* loop over row lengths determining block row lengths */
3608   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3609   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3610   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3611   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3612   rowcount = 0;
3613   nzcount = 0;
3614   for (i=0; i<mbs; i++) {
3615     nmask = 0;
3616     for (j=0; j<bs; j++) {
3617       kmax = rowlengths[rowcount];
3618       for (k=0; k<kmax; k++) {
3619         tmp = jj[nzcount++]/bs;
3620         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3621       }
3622       rowcount++;
3623     }
3624     browlengths[i] += nmask;
3625     /* zero out the mask elements we set */
3626     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3627   }
3628 
3629   /* Do preallocation  */
3630   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3631   a = (Mat_SeqBAIJ*)newmat->data;
3632 
3633   /* set matrix "i" values */
3634   a->i[0] = 0;
3635   for (i=1; i<= mbs; i++) {
3636     a->i[i]      = a->i[i-1] + browlengths[i-1];
3637     a->ilen[i-1] = browlengths[i-1];
3638   }
3639   a->nz         = 0;
3640   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3641 
3642   /* read in nonzero values */
3643   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3644   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3645   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3646 
3647   /* set "a" and "j" values into matrix */
3648   nzcount = 0; jcount = 0;
3649   for (i=0; i<mbs; i++) {
3650     nzcountb = nzcount;
3651     nmask    = 0;
3652     for (j=0; j<bs; j++) {
3653       kmax = rowlengths[i*bs+j];
3654       for (k=0; k<kmax; k++) {
3655         tmp = jj[nzcount++]/bs;
3656 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3657       }
3658     }
3659     /* sort the masked values */
3660     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3661 
3662     /* set "j" values into matrix */
3663     maskcount = 1;
3664     for (j=0; j<nmask; j++) {
3665       a->j[jcount++]  = masked[j];
3666       mask[masked[j]] = maskcount++;
3667     }
3668     /* set "a" values into matrix */
3669     ishift = bs2*a->i[i];
3670     for (j=0; j<bs; j++) {
3671       kmax = rowlengths[i*bs+j];
3672       for (k=0; k<kmax; k++) {
3673         tmp       = jj[nzcountb]/bs ;
3674         block     = mask[tmp] - 1;
3675         point     = jj[nzcountb] - bs*tmp;
3676         idx       = ishift + bs2*block + j + bs*point;
3677         a->a[idx] = (MatScalar)aa[nzcountb++];
3678       }
3679     }
3680     /* zero out the mask elements we set */
3681     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3682   }
3683   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3684 
3685   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3686   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3687   ierr = PetscFree(aa);CHKERRQ(ierr);
3688   ierr = PetscFree(jj);CHKERRQ(ierr);
3689   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3690 
3691   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3692   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3693   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3694   PetscFunctionReturn(0);
3695 }
3696 
3697 #undef __FUNCT__
3698 #define __FUNCT__ "MatCreateSeqBAIJ"
3699 /*@C
3700    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3701    compressed row) format.  For good matrix assembly performance the
3702    user should preallocate the matrix storage by setting the parameter nz
3703    (or the array nnz).  By setting these parameters accurately, performance
3704    during matrix assembly can be increased by more than a factor of 50.
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  comm - MPI communicator, set to PETSC_COMM_SELF
3710 .  bs - size of block
3711 .  m - number of rows
3712 .  n - number of columns
3713 .  nz - number of nonzero blocks  per block row (same for all rows)
3714 -  nnz - array containing the number of nonzero blocks in the various block rows
3715          (possibly different for each block row) or PETSC_NULL
3716 
3717    Output Parameter:
3718 .  A - the matrix
3719 
3720    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3721    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3722    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3723 
3724    Options Database Keys:
3725 .   -mat_no_unroll - uses code that does not unroll the loops in the
3726                      block calculations (much slower)
3727 .    -mat_block_size - size of the blocks to use
3728 
3729    Level: intermediate
3730 
3731    Notes:
3732    The number of rows and columns must be divisible by blocksize.
3733 
3734    If the nnz parameter is given then the nz parameter is ignored
3735 
3736    A nonzero block is any block that as 1 or more nonzeros in it
3737 
3738    The block AIJ format is fully compatible with standard Fortran 77
3739    storage.  That is, the stored row and column indices can begin at
3740    either one (as in Fortran) or zero.  See the users' manual for details.
3741 
3742    Specify the preallocated storage with either nz or nnz (not both).
3743    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3744    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3745    matrices.
3746 
3747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3748 @*/
3749 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3750 {
3751   PetscErrorCode ierr;
3752 
3753   PetscFunctionBegin;
3754   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3755   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3756   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3757   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3758   PetscFunctionReturn(0);
3759 }
3760 
3761 #undef __FUNCT__
3762 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3763 /*@C
3764    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3765    per row in the matrix. For good matrix assembly performance the
3766    user should preallocate the matrix storage by setting the parameter nz
3767    (or the array nnz).  By setting these parameters accurately, performance
3768    during matrix assembly can be increased by more than a factor of 50.
3769 
3770    Collective on MPI_Comm
3771 
3772    Input Parameters:
3773 +  A - the matrix
3774 .  bs - size of block
3775 .  nz - number of block nonzeros per block row (same for all rows)
3776 -  nnz - array containing the number of block nonzeros in the various block rows
3777          (possibly different for each block row) or PETSC_NULL
3778 
3779    Options Database Keys:
3780 .   -mat_no_unroll - uses code that does not unroll the loops in the
3781                      block calculations (much slower)
3782 .    -mat_block_size - size of the blocks to use
3783 
3784    Level: intermediate
3785 
3786    Notes:
3787    If the nnz parameter is given then the nz parameter is ignored
3788 
3789    You can call MatGetInfo() to get information on how effective the preallocation was;
3790    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3791    You can also run with the option -info and look for messages with the string
3792    malloc in them to see if additional memory allocation was needed.
3793 
3794    The block AIJ format is fully compatible with standard Fortran 77
3795    storage.  That is, the stored row and column indices can begin at
3796    either one (as in Fortran) or zero.  See the users' manual for details.
3797 
3798    Specify the preallocated storage with either nz or nnz (not both).
3799    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3800    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3801 
3802 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3803 @*/
3804 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3805 {
3806   PetscErrorCode ierr;
3807 
3808   PetscFunctionBegin;
3809   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3810   PetscValidType(B,1);
3811   PetscValidLogicalCollectiveInt(B,bs,2);
3812   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3813   PetscFunctionReturn(0);
3814 }
3815 
3816 #undef __FUNCT__
3817 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3818 /*@C
3819    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3820    (the default sequential PETSc format).
3821 
3822    Collective on MPI_Comm
3823 
3824    Input Parameters:
3825 +  A - the matrix
3826 .  i - the indices into j for the start of each local row (starts with zero)
3827 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3828 -  v - optional values in the matrix
3829 
3830    Level: developer
3831 
3832 .keywords: matrix, aij, compressed row, sparse
3833 
3834 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3835 @*/
3836 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3837 {
3838   PetscErrorCode ierr;
3839 
3840   PetscFunctionBegin;
3841   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3842   PetscValidType(B,1);
3843   PetscValidLogicalCollectiveInt(B,bs,2);
3844   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3845   PetscFunctionReturn(0);
3846 }
3847 
3848 
3849 #undef __FUNCT__
3850 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3851 /*@
3852      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3853 
3854      Collective on MPI_Comm
3855 
3856    Input Parameters:
3857 +  comm - must be an MPI communicator of size 1
3858 .  bs - size of block
3859 .  m - number of rows
3860 .  n - number of columns
3861 .  i - row indices
3862 .  j - column indices
3863 -  a - matrix values
3864 
3865    Output Parameter:
3866 .  mat - the matrix
3867 
3868    Level: advanced
3869 
3870    Notes:
3871        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3872     once the matrix is destroyed
3873 
3874        You cannot set new nonzero locations into this matrix, that will generate an error.
3875 
3876        The i and j indices are 0 based
3877 
3878        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3879 
3880 
3881 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3882 
3883 @*/
3884 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3885 {
3886   PetscErrorCode ierr;
3887   PetscInt       ii;
3888   Mat_SeqBAIJ    *baij;
3889 
3890   PetscFunctionBegin;
3891   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3892   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3893 
3894   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3895   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3896   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3897   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3898   baij = (Mat_SeqBAIJ*)(*mat)->data;
3899   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3900   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3901 
3902   baij->i = i;
3903   baij->j = j;
3904   baij->a = a;
3905   baij->singlemalloc = PETSC_FALSE;
3906   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3907   baij->free_a       = PETSC_FALSE;
3908   baij->free_ij      = PETSC_FALSE;
3909 
3910   for (ii=0; ii<m; ii++) {
3911     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3912 #if defined(PETSC_USE_DEBUG)
3913     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3914 #endif
3915   }
3916 #if defined(PETSC_USE_DEBUG)
3917   for (ii=0; ii<baij->i[m]; ii++) {
3918     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3919     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3920   }
3921 #endif
3922 
3923   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3924   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3925   PetscFunctionReturn(0);
3926 }
3927