xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7b6bb2c608b6fc6714ef38fda02c2dbb91c82665)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 #undef __FUNCT__
11 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
12 /*@
13   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
14 
15   Collective on Mat
16 
17   Input Parameters:
18 . mat - the matrix
19 
20   Level: advanced
21 @*/
22 PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
23 {
24   PetscErrorCode ierr;
25 
26   PetscFunctionBegin;
27   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
28   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
29   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
30   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
31   PetscFunctionReturn(0);
32 }
33 
34 EXTERN_C_BEGIN
35 #undef __FUNCT__
36 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
37 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
38 {
39   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
40   PetscErrorCode ierr;
41   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
42   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
43   PetscReal      shift = 0.0;
44 
45   PetscFunctionBegin;
46   if (a->idiagvalid) PetscFunctionReturn(0);
47   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
48   diag_offset = a->diag;
49   if (!a->idiag) {
50     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
51     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
52   }
53   diag  = a->idiag;
54   mdiag = a->idiag+bs2*mbs;
55   /* factor and invert each block */
56   switch (bs){
57     case 1:
58       for (i=0; i<mbs; i++) {
59         odiag = v + 1*diag_offset[i];
60         diag[0]  = odiag[0];
61         mdiag[0] = odiag[0];
62         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
63         diag    += 1;
64         mdiag   += 1;
65       }
66       break;
67     case 2:
68       for (i=0; i<mbs; i++) {
69         odiag   = v + 4*diag_offset[i];
70         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
71 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
72 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
73 	diag    += 4;
74 	mdiag   += 4;
75       }
76       break;
77     case 3:
78       for (i=0; i<mbs; i++) {
79         odiag    = v + 9*diag_offset[i];
80         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
81         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
82         diag[8]  = odiag[8];
83         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
84         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
85         mdiag[8] = odiag[8];
86 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
87 	diag    += 9;
88 	mdiag   += 9;
89       }
90       break;
91     case 4:
92       for (i=0; i<mbs; i++) {
93         odiag  = v + 16*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
97 	diag  += 16;
98 	mdiag += 16;
99       }
100       break;
101     case 5:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 25*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
107 	diag  += 25;
108 	mdiag += 25;
109       }
110       break;
111     case 6:
112       for (i=0; i<mbs; i++) {
113         odiag = v + 36*diag_offset[i];
114         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
115         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
117 	diag  += 36;
118 	mdiag += 36;
119       }
120       break;
121     case 7:
122       for (i=0; i<mbs; i++) {
123         odiag = v + 49*diag_offset[i];
124         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
125         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
127 	diag  += 49;
128 	mdiag += 49;
129       }
130       break;
131     default:
132       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
133       for (i=0; i<mbs; i++) {
134         odiag = v + bs2*diag_offset[i];
135         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
136         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
138 	diag  += bs2;
139 	mdiag += bs2;
140       }
141       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
142   }
143   a->idiagvalid = PETSC_TRUE;
144   PetscFunctionReturn(0);
145 }
146 EXTERN_C_END
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
150 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
151 {
152   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
153   PetscScalar        *x,x1,s1;
154   const PetscScalar  *b;
155   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
156   PetscErrorCode     ierr;
157   PetscInt           m = a->mbs,i,i2,nz,j;
158   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
159 
160   PetscFunctionBegin;
161   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
162   its = its*lits;
163   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
164   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
165   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
166   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
167   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
168 
169   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
170 
171   diag  = a->diag;
172   idiag = a->idiag;
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
178       x[0] = b[0]*idiag[0];
179       i2     = 1;
180       idiag += 1;
181       for (i=1; i<m; i++) {
182         v     = aa + ai[i];
183         vi    = aj + ai[i];
184         nz    = diag[i] - ai[i];
185         s1    = b[i2];
186         for (j=0; j<nz; j++) {
187           s1 -= v[j]*x[vi[j]];
188         }
189         x[i2]   = idiag[0]*s1;
190         idiag   += 1;
191         i2      += 1;
192       }
193       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
194       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
195     }
196     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
197         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
198       i2    = 0;
199       mdiag = a->idiag+a->mbs;
200       for (i=0; i<m; i++) {
201         x1      = x[i2];
202         x[i2]   = mdiag[0]*x1;
203         mdiag  += 1;
204         i2     += 1;
205       }
206       ierr = PetscLogFlops(m);CHKERRQ(ierr);
207     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
208       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
209     }
210     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
211       idiag   = a->idiag+a->mbs - 1;
212       i2      = m - 1;
213       x1      = x[i2];
214       x[i2]   = idiag[0]*x1;
215       idiag -= 1;
216       i2    -= 1;
217       for (i=m-2; i>=0; i--) {
218         v     = aa + (diag[i]+1);
219         vi    = aj + diag[i] + 1;
220         nz    = ai[i+1] - diag[i] - 1;
221         s1    = x[i2];
222         for (j=0; j<nz; j++) {
223           s1 -= v[j]*x[vi[j]];
224         }
225         x[i2]   = idiag[0]*s1;
226         idiag   -= 1;
227         i2      -= 1;
228       }
229       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
230     }
231   } else {
232     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
233   }
234   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
235   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
236   PetscFunctionReturn(0);
237 }
238 
239 #undef __FUNCT__
240 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
241 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
242 {
243   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
244   PetscScalar        *x,x1,x2,s1,s2;
245   const PetscScalar  *b;
246   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
247   PetscErrorCode     ierr;
248   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
249   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
250 
251   PetscFunctionBegin;
252   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
253   its = its*lits;
254   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
255   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
256   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
257   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
258   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
259 
260   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
261 
262   diag  = a->diag;
263   idiag = a->idiag;
264   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
265   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
266 
267   if (flag & SOR_ZERO_INITIAL_GUESS) {
268     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
269       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
270       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
271       i2     = 2;
272       idiag += 4;
273       for (i=1; i<m; i++) {
274 	v     = aa + 4*ai[i];
275 	vi    = aj + ai[i];
276 	nz    = diag[i] - ai[i];
277 	s1    = b[i2]; s2 = b[i2+1];
278         for (j=0; j<nz; j++) {
279 	  idx  = 2*vi[j];
280           it   = 4*j;
281 	  x1   = x[idx]; x2 = x[1+idx];
282 	  s1  -= v[it]*x1 + v[it+2]*x2;
283 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
284 	}
285 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
286 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
287         idiag   += 4;
288         i2      += 2;
289       }
290       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
291       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
292     }
293     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
294         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
295       i2    = 0;
296       mdiag = a->idiag+4*a->mbs;
297       for (i=0; i<m; i++) {
298         x1      = x[i2]; x2 = x[i2+1];
299         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
300         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
301         mdiag  += 4;
302         i2     += 2;
303       }
304       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
305     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
306       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
307     }
308     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
309       idiag   = a->idiag+4*a->mbs - 4;
310       i2      = 2*m - 2;
311       x1      = x[i2]; x2 = x[i2+1];
312       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
313       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
314       idiag -= 4;
315       i2    -= 2;
316       for (i=m-2; i>=0; i--) {
317 	v     = aa + 4*(diag[i]+1);
318 	vi    = aj + diag[i] + 1;
319 	nz    = ai[i+1] - diag[i] - 1;
320 	s1    = x[i2]; s2 = x[i2+1];
321         for (j=0; j<nz; j++) {
322  	  idx  = 2*vi[j];
323           it   = 4*j;
324 	  x1   = x[idx]; x2 = x[1+idx];
325 	  s1  -= v[it]*x1 + v[it+2]*x2;
326 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
327 	}
328 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
329 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
330         idiag   -= 4;
331         i2      -= 2;
332       }
333       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
334     }
335   } else {
336     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
337   }
338   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
339   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
340   PetscFunctionReturn(0);
341 }
342 
343 #undef __FUNCT__
344 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
345 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
346 {
347   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
348   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
349   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
350   const PetscScalar  *b;
351   PetscErrorCode     ierr;
352   PetscInt           m = a->mbs,i,i2,nz,idx;
353   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
354 
355   PetscFunctionBegin;
356   its = its*lits;
357   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
358   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
359   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
360   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
361   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
362   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
363 
364   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
365 
366   diag  = a->diag;
367   idiag = a->idiag;
368   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
369   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
370 
371   if (flag & SOR_ZERO_INITIAL_GUESS) {
372     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
373       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
374       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
375       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
376       i2     = 3;
377       idiag += 9;
378       for (i=1; i<m; i++) {
379         v     = aa + 9*ai[i];
380         vi    = aj + ai[i];
381         nz    = diag[i] - ai[i];
382         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
383         while (nz--) {
384           idx  = 3*(*vi++);
385           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
386           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
387           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
388           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
389           v   += 9;
390         }
391         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
392         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
393         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
394         idiag   += 9;
395         i2      += 3;
396       }
397       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
398       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
399     }
400     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
401         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
402       i2    = 0;
403       mdiag = a->idiag+9*a->mbs;
404       for (i=0; i<m; i++) {
405         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
406         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
407         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
408         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
409         mdiag  += 9;
410         i2     += 3;
411       }
412       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
413     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
414       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
415     }
416     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
417       idiag   = a->idiag+9*a->mbs - 9;
418       i2      = 3*m - 3;
419       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
420       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
421       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
422       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
423       idiag -= 9;
424       i2    -= 3;
425       for (i=m-2; i>=0; i--) {
426         v     = aa + 9*(diag[i]+1);
427         vi    = aj + diag[i] + 1;
428         nz    = ai[i+1] - diag[i] - 1;
429         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
430         while (nz--) {
431           idx  = 3*(*vi++);
432           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
433           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
434           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
435           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
436           v   += 9;
437         }
438         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
439         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
440         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
441         idiag   -= 9;
442         i2      -= 3;
443       }
444       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
445     }
446   } else {
447     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
448   }
449   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
456 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
457 {
458   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
459   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
460   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
461   const PetscScalar  *b;
462   PetscErrorCode     ierr;
463   PetscInt           m = a->mbs,i,i2,nz,idx;
464   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
465 
466   PetscFunctionBegin;
467   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
468   its = its*lits;
469   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
470   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
471   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
472   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
473   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
474 
475   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
476 
477   diag  = a->diag;
478   idiag = a->idiag;
479   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
480   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
481 
482   if (flag & SOR_ZERO_INITIAL_GUESS) {
483     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
484       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
485       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
486       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
487       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
488       i2     = 4;
489       idiag += 16;
490       for (i=1; i<m; i++) {
491 	v     = aa + 16*ai[i];
492 	vi    = aj + ai[i];
493 	nz    = diag[i] - ai[i];
494 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
495 	while (nz--) {
496 	  idx  = 4*(*vi++);
497 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
498 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
499 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
500 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
501 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
502 	  v   += 16;
503 	}
504 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
505 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
506 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
507 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
508         idiag   += 16;
509         i2      += 4;
510       }
511       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
512       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
513     }
514     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
515         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
516       i2    = 0;
517       mdiag = a->idiag+16*a->mbs;
518       for (i=0; i<m; i++) {
519         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
520         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
521         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
522         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
523         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
524         mdiag  += 16;
525         i2     += 4;
526       }
527       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
528     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
529       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
530     }
531     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
532       idiag   = a->idiag+16*a->mbs - 16;
533       i2      = 4*m - 4;
534       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
535       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
536       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
537       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
538       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
539       idiag -= 16;
540       i2    -= 4;
541       for (i=m-2; i>=0; i--) {
542 	v     = aa + 16*(diag[i]+1);
543 	vi    = aj + diag[i] + 1;
544 	nz    = ai[i+1] - diag[i] - 1;
545 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
546 	while (nz--) {
547 	  idx  = 4*(*vi++);
548 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
549 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
550 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
551 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
552 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
553 	  v   += 16;
554 	}
555 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
556 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
557 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
558 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
559         idiag   -= 16;
560         i2      -= 4;
561       }
562       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
563     }
564   } else {
565     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
566   }
567   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
568   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
569   PetscFunctionReturn(0);
570 }
571 
572 #undef __FUNCT__
573 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
574 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
575 {
576   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
577   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
578   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
579   const PetscScalar  *b;
580   PetscErrorCode     ierr;
581   PetscInt           m = a->mbs,i,i2,nz,idx;
582   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
583 
584   PetscFunctionBegin;
585   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
586   its = its*lits;
587   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
588   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
589   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
590   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
591   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
592 
593   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
594 
595   diag  = a->diag;
596   idiag = a->idiag;
597   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
598   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
599 
600   if (flag & SOR_ZERO_INITIAL_GUESS) {
601     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
602       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
603       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
604       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
605       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
606       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
607       i2     = 5;
608       idiag += 25;
609       for (i=1; i<m; i++) {
610 	v     = aa + 25*ai[i];
611 	vi    = aj + ai[i];
612 	nz    = diag[i] - ai[i];
613 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
614 	while (nz--) {
615 	  idx  = 5*(*vi++);
616 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
617 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
618 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
619 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
620 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
621 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
622 	  v   += 25;
623 	}
624 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
625 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
626 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
627 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
628 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
629         idiag   += 25;
630         i2      += 5;
631       }
632       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
633       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
634     }
635     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
636         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
637       i2    = 0;
638       mdiag = a->idiag+25*a->mbs;
639       for (i=0; i<m; i++) {
640         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
641         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
642         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
643         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
644         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
645         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
646         mdiag  += 25;
647         i2     += 5;
648       }
649       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
650     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
651       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
652     }
653     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
654       idiag   = a->idiag+25*a->mbs - 25;
655       i2      = 5*m - 5;
656       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
657       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
658       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
659       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
660       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
661       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
662       idiag -= 25;
663       i2    -= 5;
664       for (i=m-2; i>=0; i--) {
665 	v     = aa + 25*(diag[i]+1);
666 	vi    = aj + diag[i] + 1;
667 	nz    = ai[i+1] - diag[i] - 1;
668 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
669 	while (nz--) {
670 	  idx  = 5*(*vi++);
671 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
672 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
673 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
674 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
675 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
676 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
677 	  v   += 25;
678 	}
679 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
680 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
681 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
682 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
683 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
684         idiag   -= 25;
685         i2      -= 5;
686       }
687       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
688     }
689   } else {
690     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
691   }
692   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
693   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
694   PetscFunctionReturn(0);
695 }
696 
697 #undef __FUNCT__
698 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
699 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
700 {
701   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
702   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
703   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
704   const PetscScalar  *b;
705   PetscErrorCode     ierr;
706   PetscInt           m = a->mbs,i,i2,nz,idx;
707   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
708 
709   PetscFunctionBegin;
710   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
711   its = its*lits;
712   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
713   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
714   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
715   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
716   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
717 
718   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
719 
720   diag  = a->diag;
721   idiag = a->idiag;
722   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
723   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
724 
725   if (flag & SOR_ZERO_INITIAL_GUESS) {
726     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
727       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
728       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
729       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
730       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
731       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
732       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
733       i2     = 6;
734       idiag += 36;
735       for (i=1; i<m; i++) {
736         v     = aa + 36*ai[i];
737         vi    = aj + ai[i];
738         nz    = diag[i] - ai[i];
739         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
740         while (nz--) {
741           idx  = 6*(*vi++);
742           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
743           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
744           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
745           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
746           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
747           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
748           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
749           v   += 36;
750         }
751         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
752         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
753         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
754         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
755         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
756         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
757         idiag   += 36;
758         i2      += 6;
759       }
760       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
761       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
762     }
763     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
764         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
765       i2    = 0;
766       mdiag = a->idiag+36*a->mbs;
767       for (i=0; i<m; i++) {
768         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
769         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
770         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
771         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
772         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
773         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
774         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
775         mdiag  += 36;
776         i2     += 6;
777       }
778       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
779     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
780       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
781     }
782     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
783       idiag   = a->idiag+36*a->mbs - 36;
784       i2      = 6*m - 6;
785       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
786       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
787       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
788       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
789       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
790       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
791       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
792       idiag -= 36;
793       i2    -= 6;
794       for (i=m-2; i>=0; i--) {
795         v     = aa + 36*(diag[i]+1);
796         vi    = aj + diag[i] + 1;
797         nz    = ai[i+1] - diag[i] - 1;
798         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
799         while (nz--) {
800           idx  = 6*(*vi++);
801           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
802           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
803           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
804           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
805           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
806           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
807           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
808           v   += 36;
809         }
810         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
811         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
812         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
813         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
814         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
815         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
816         idiag   -= 36;
817         i2      -= 6;
818       }
819       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
820     }
821   } else {
822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
823   }
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
831 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
832 {
833   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
834   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
835   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
836   const PetscScalar  *b;
837   PetscErrorCode     ierr;
838   PetscInt           m = a->mbs,i,i2,nz,idx;
839   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
840 
841   PetscFunctionBegin;
842   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
843   its = its*lits;
844   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
845   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
846   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
847   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
848   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
849 
850   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
851 
852   diag  = a->diag;
853   idiag = a->idiag;
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
856 
857   if (flag & SOR_ZERO_INITIAL_GUESS) {
858     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
859       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
860       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
861       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
862       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
863       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
864       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
865       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
866       i2     = 7;
867       idiag += 49;
868       for (i=1; i<m; i++) {
869         v     = aa + 49*ai[i];
870         vi    = aj + ai[i];
871         nz    = diag[i] - ai[i];
872         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
873         while (nz--) {
874           idx  = 7*(*vi++);
875           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
876           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
877           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
878           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
879           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
880           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
881           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
882           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
883           v   += 49;
884         }
885         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
886         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
887         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
888         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
889         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
890         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
891         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
892         idiag   += 49;
893         i2      += 7;
894       }
895       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
896       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
897     }
898     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
899         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
900       i2    = 0;
901       mdiag = a->idiag+49*a->mbs;
902       for (i=0; i<m; i++) {
903         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
904         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
905         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
906         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
907         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
908         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
909         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
910         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
911         mdiag  += 49;
912         i2     += 7;
913       }
914       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
915     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
916       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
917     }
918     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
919       idiag   = a->idiag+49*a->mbs - 49;
920       i2      = 7*m - 7;
921       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
922       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
923       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
924       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
925       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
926       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
927       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
928       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
929       idiag -= 49;
930       i2    -= 7;
931       for (i=m-2; i>=0; i--) {
932         v     = aa + 49*(diag[i]+1);
933         vi    = aj + diag[i] + 1;
934         nz    = ai[i+1] - diag[i] - 1;
935         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
936         while (nz--) {
937           idx  = 7*(*vi++);
938           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
939           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
940           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
941           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
942           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
943           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
944           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
945           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
946           v   += 49;
947         }
948         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
949         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
950         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
951         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
952         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
953         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
954         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
955         idiag   -= 49;
956         i2      -= 7;
957       }
958       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
959     }
960   } else {
961     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
962   }
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
970 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
971 {
972   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
973   PetscScalar        *x,*work,*w,*workt;
974   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
975   const PetscScalar  *b;
976   PetscErrorCode     ierr;
977   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
978   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
979 
980   PetscFunctionBegin;
981   its = its*lits;
982   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
983   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
984   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
985   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
986   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
987   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
988 
989   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
990 
991   diag  = a->diag;
992   idiag = a->idiag;
993   if (!a->mult_work) {
994     k    = PetscMax(A->rmap->n,A->cmap->n);
995     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
996   }
997   work = a->mult_work;
998   if (!a->sor_work) {
999     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1000   }
1001   w = a->sor_work;
1002 
1003   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1004   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1005 
1006   if (flag & SOR_ZERO_INITIAL_GUESS) {
1007     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1008       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1009       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1010       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1011       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1012       i2     = bs;
1013       idiag += bs2;
1014       for (i=1; i<m; i++) {
1015         v     = aa + bs2*ai[i];
1016         vi    = aj + ai[i];
1017         nz    = diag[i] - ai[i];
1018 
1019         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1020 	/* copy all rows of x that are needed into contiguous space */
1021         workt = work;
1022         for (j=0; j<nz; j++) {
1023           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1024           workt += bs;
1025         }
1026         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1027        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1028         while (nz--) {
1029           idx  = N*(*vi++);
1030           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1031           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1032           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1033           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1034           v   += N2;
1035 	  } */
1036 
1037         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1038         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1039         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1040         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1041 
1042         idiag   += bs2;
1043         i2      += bs;
1044       }
1045       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1046       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1047     }
1048     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1049         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1050       i2    = 0;
1051       mdiag = a->idiag+bs2*a->mbs;
1052       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053       for (i=0; i<m; i++) {
1054         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1055         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1056         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1057         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1058         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1059 
1060         mdiag  += bs2;
1061         i2     += bs;
1062       }
1063       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1064     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1065       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1066     }
1067     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1068       idiag   = a->idiag+bs2*a->mbs - bs2;
1069       i2      = bs*m - bs;
1070       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1071       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1072       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1073       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1074       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1075       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1076       idiag -= bs2;
1077       i2    -= bs;
1078       for (i=m-2; i>=0; i--) {
1079         v     = aa + bs2*(diag[i]+1);
1080         vi    = aj + diag[i] + 1;
1081         nz    = ai[i+1] - diag[i] - 1;
1082 
1083         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1084 	/* copy all rows of x that are needed into contiguous space */
1085         workt = work;
1086         for (j=0; j<nz; j++) {
1087           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1088           workt += bs;
1089         }
1090         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1091         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1092         while (nz--) {
1093           idx  = N*(*vi++);
1094           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1095           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1096           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1097           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1098           v   += N2;
1099 	  } */
1100 
1101         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1102         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1103         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1104         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1105         idiag   -= bs2;
1106         i2      -= bs;
1107       }
1108       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1109     }
1110   } else {
1111     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1112   }
1113   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1114   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 /*
1119     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1120 */
1121 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1122 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1123 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1124 #define matsetvaluesblocked4_ matsetvaluesblocked4
1125 #endif
1126 
1127 EXTERN_C_BEGIN
1128 #undef __FUNCT__
1129 #define __FUNCT__ "matsetvaluesblocked4_"
1130 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1131 {
1132   Mat               A = *AA;
1133   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1134   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1135   PetscInt          *ai=a->i,*ailen=a->ilen;
1136   PetscInt          *aj=a->j,stepval,lastcol = -1;
1137   const PetscScalar *value = v;
1138   MatScalar         *ap,*aa = a->a,*bap;
1139 
1140   PetscFunctionBegin;
1141   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1142   stepval = (n-1)*4;
1143   for (k=0; k<m; k++) { /* loop over added rows */
1144     row  = im[k];
1145     rp   = aj + ai[row];
1146     ap   = aa + 16*ai[row];
1147     nrow = ailen[row];
1148     low  = 0;
1149     high = nrow;
1150     for (l=0; l<n; l++) { /* loop over added columns */
1151       col = in[l];
1152       if (col <= lastcol) low = 0; else high = nrow;
1153       lastcol = col;
1154       value = v + k*(stepval+4 + l)*4;
1155       while (high-low > 7) {
1156         t = (low+high)/2;
1157         if (rp[t] > col) high = t;
1158         else             low  = t;
1159       }
1160       for (i=low; i<high; i++) {
1161         if (rp[i] > col) break;
1162         if (rp[i] == col) {
1163           bap  = ap +  16*i;
1164           for (ii=0; ii<4; ii++,value+=stepval) {
1165             for (jj=ii; jj<16; jj+=4) {
1166               bap[jj] += *value++;
1167             }
1168           }
1169           goto noinsert2;
1170         }
1171       }
1172       N = nrow++ - 1;
1173       high++; /* added new column index thus must search to one higher than before */
1174       /* shift up all the later entries in this row */
1175       for (ii=N; ii>=i; ii--) {
1176         rp[ii+1] = rp[ii];
1177         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1178       }
1179       if (N >= i) {
1180         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1181       }
1182       rp[i] = col;
1183       bap   = ap +  16*i;
1184       for (ii=0; ii<4; ii++,value+=stepval) {
1185         for (jj=ii; jj<16; jj+=4) {
1186           bap[jj] = *value++;
1187         }
1188       }
1189       noinsert2:;
1190       low = i;
1191     }
1192     ailen[row] = nrow;
1193   }
1194   PetscFunctionReturnVoid();
1195 }
1196 EXTERN_C_END
1197 
1198 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1199 #define matsetvalues4_ MATSETVALUES4
1200 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1201 #define matsetvalues4_ matsetvalues4
1202 #endif
1203 
1204 EXTERN_C_BEGIN
1205 #undef __FUNCT__
1206 #define __FUNCT__ "MatSetValues4_"
1207 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1208 {
1209   Mat         A = *AA;
1210   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1211   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1212   PetscInt    *ai=a->i,*ailen=a->ilen;
1213   PetscInt    *aj=a->j,brow,bcol;
1214   PetscInt    ridx,cidx,lastcol = -1;
1215   MatScalar   *ap,value,*aa=a->a,*bap;
1216 
1217   PetscFunctionBegin;
1218   for (k=0; k<m; k++) { /* loop over added rows */
1219     row  = im[k]; brow = row/4;
1220     rp   = aj + ai[brow];
1221     ap   = aa + 16*ai[brow];
1222     nrow = ailen[brow];
1223     low  = 0;
1224     high = nrow;
1225     for (l=0; l<n; l++) { /* loop over added columns */
1226       col = in[l]; bcol = col/4;
1227       ridx = row % 4; cidx = col % 4;
1228       value = v[l + k*n];
1229       if (col <= lastcol) low = 0; else high = nrow;
1230       lastcol = col;
1231       while (high-low > 7) {
1232         t = (low+high)/2;
1233         if (rp[t] > bcol) high = t;
1234         else              low  = t;
1235       }
1236       for (i=low; i<high; i++) {
1237         if (rp[i] > bcol) break;
1238         if (rp[i] == bcol) {
1239           bap  = ap +  16*i + 4*cidx + ridx;
1240           *bap += value;
1241           goto noinsert1;
1242         }
1243       }
1244       N = nrow++ - 1;
1245       high++; /* added new column thus must search to one higher than before */
1246       /* shift up all the later entries in this row */
1247       for (ii=N; ii>=i; ii--) {
1248         rp[ii+1] = rp[ii];
1249         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1250       }
1251       if (N>=i) {
1252         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1253       }
1254       rp[i]                    = bcol;
1255       ap[16*i + 4*cidx + ridx] = value;
1256       noinsert1:;
1257       low = i;
1258     }
1259     ailen[brow] = nrow;
1260   }
1261   PetscFunctionReturnVoid();
1262 }
1263 EXTERN_C_END
1264 
1265 /*
1266      Checks for missing diagonals
1267 */
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1270 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1271 {
1272   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274   PetscInt       *diag,*jj = a->j,i;
1275 
1276   PetscFunctionBegin;
1277   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1278   *missing = PETSC_FALSE;
1279   if (A->rmap->n > 0 && !jj) {
1280     *missing  = PETSC_TRUE;
1281     if (d) *d = 0;
1282     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1283   } else {
1284     diag     = a->diag;
1285     for (i=0; i<a->mbs; i++) {
1286       if (jj[diag[i]] != i) {
1287         *missing  = PETSC_TRUE;
1288         if (d) *d = i;
1289         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1290       }
1291     }
1292   }
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1298 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1299 {
1300   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302   PetscInt       i,j,m = a->mbs;
1303 
1304   PetscFunctionBegin;
1305   if (!a->diag) {
1306     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1307     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1308     a->free_diag = PETSC_TRUE;
1309   }
1310   for (i=0; i<m; i++) {
1311     a->diag[i] = a->i[i+1];
1312     for (j=a->i[i]; j<a->i[i+1]; j++) {
1313       if (a->j[j] == i) {
1314         a->diag[i] = j;
1315         break;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 
1323 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1327 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1332   PetscInt       *tia, *tja;
1333 
1334   PetscFunctionBegin;
1335   *nn = n;
1336   if (!ia) PetscFunctionReturn(0);
1337   if (symmetric) {
1338     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1339     nz   = tia[n];
1340   } else {
1341     tia = a->i; tja = a->j;
1342   }
1343 
1344   if (!blockcompressed && bs > 1) {
1345     (*nn) *= bs;
1346     /* malloc & create the natural set of indices */
1347     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1348     if (n) {
1349       (*ia)[0] = 0;
1350       for (j=1; j<bs; j++) {
1351         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1352       }
1353     }
1354 
1355     for (i=1; i<n; i++) {
1356       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1357       for (j=1; j<bs; j++) {
1358         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1359       }
1360     }
1361     if (n) {
1362       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1363     }
1364 
1365     if (ja) {
1366       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1367       cnt = 0;
1368       for (i=0; i<n; i++) {
1369         for (j=0; j<bs; j++) {
1370           for (k=tia[i]; k<tia[i+1]; k++) {
1371             for (l=0; l<bs; l++) {
1372               (*ja)[cnt++] = bs*tja[k] + l;
1373 	    }
1374           }
1375         }
1376       }
1377     }
1378 
1379     n     *= bs;
1380     nz *= bs*bs;
1381     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1382       ierr = PetscFree(tia);CHKERRQ(ierr);
1383       ierr = PetscFree(tja);CHKERRQ(ierr);
1384     }
1385   } else if (oshift == 1) {
1386     if (symmetric) {
1387       PetscInt nz = tia[A->rmap->n/bs];
1388       /*  add 1 to i and j indices */
1389       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1390       *ia = tia;
1391       if (ja) {
1392 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1393         *ja = tja;
1394       }
1395     } else {
1396       PetscInt nz = a->i[A->rmap->n/bs];
1397       /* malloc space and  add 1 to i and j indices */
1398       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1399       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1400       if (ja) {
1401 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1402 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1403       }
1404     }
1405   } else {
1406     *ia = tia;
1407     if (ja) *ja = tja;
1408   }
1409 
1410   PetscFunctionReturn(0);
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1415 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1416 {
1417   PetscErrorCode ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!ia) PetscFunctionReturn(0);
1421   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1422     ierr = PetscFree(*ia);CHKERRQ(ierr);
1423     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1430 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434 
1435   PetscFunctionBegin;
1436 #if defined(PETSC_USE_LOG)
1437   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1438 #endif
1439   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1440   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1441   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1442   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1443   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1444   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1445   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1446   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1447   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1448   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1449   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1450   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1451   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1452 
1453   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1454   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1455   ierr = PetscFree(A->data);CHKERRQ(ierr);
1456 
1457   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1458   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1459   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1460   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1461   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1462   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1473 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477 
1478   PetscFunctionBegin;
1479   switch (op) {
1480   case MAT_ROW_ORIENTED:
1481     a->roworiented    = flg;
1482     break;
1483   case MAT_KEEP_NONZERO_PATTERN:
1484     a->keepnonzeropattern = flg;
1485     break;
1486   case MAT_NEW_NONZERO_LOCATIONS:
1487     a->nonew          = (flg ? 0 : 1);
1488     break;
1489   case MAT_NEW_NONZERO_LOCATION_ERR:
1490     a->nonew          = (flg ? -1 : 0);
1491     break;
1492   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1493     a->nonew          = (flg ? -2 : 0);
1494     break;
1495   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1496     a->nounused       = (flg ? -1 : 0);
1497     break;
1498   case MAT_CHECK_COMPRESSED_ROW:
1499     a->compressedrow.check = flg;
1500     break;
1501   case MAT_NEW_DIAGONALS:
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503   case MAT_USE_HASH_TABLE:
1504     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505     break;
1506   case MAT_SYMMETRIC:
1507   case MAT_STRUCTURALLY_SYMMETRIC:
1508   case MAT_HERMITIAN:
1509   case MAT_SYMMETRY_ETERNAL:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   default:
1513     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscErrorCode ierr;
1524   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1525   MatScalar      *aa,*aa_i;
1526   PetscScalar    *v_i;
1527 
1528   PetscFunctionBegin;
1529   bs  = A->rmap->bs;
1530   ai  = a->i;
1531   aj  = a->j;
1532   aa  = a->a;
1533   bs2 = a->bs2;
1534 
1535   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1536 
1537   bn  = row/bs;   /* Block number */
1538   bp  = row % bs; /* Block Position */
1539   M   = ai[bn+1] - ai[bn];
1540   *nz = bs*M;
1541 
1542   if (v) {
1543     *v = 0;
1544     if (*nz) {
1545       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1546       for (i=0; i<M; i++) { /* for each block in the block row */
1547         v_i  = *v + i*bs;
1548         aa_i = aa + bs2*(ai[bn] + i);
1549         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1550       }
1551     }
1552   }
1553 
1554   if (idx) {
1555     *idx = 0;
1556     if (*nz) {
1557       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1558       for (i=0; i<M; i++) { /* for each block in the block row */
1559         idx_i = *idx + i*bs;
1560         itmp  = bs*aj[ai[bn] + i];
1561         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1562       }
1563     }
1564   }
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1571 {
1572   PetscErrorCode ierr;
1573 
1574   PetscFunctionBegin;
1575   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1576   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1585 {
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1587   Mat            C;
1588   PetscErrorCode ierr;
1589   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1590   PetscInt       *rows,*cols,bs2=a->bs2;
1591   MatScalar      *array;
1592 
1593   PetscFunctionBegin;
1594   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1595   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1596     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1597     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1598 
1599     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1600     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1601     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1602     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1603     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1604     ierr = PetscFree(col);CHKERRQ(ierr);
1605   } else {
1606     C = *B;
1607   }
1608 
1609   array = a->a;
1610   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1611   for (i=0; i<mbs; i++) {
1612     cols[0] = i*bs;
1613     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1614     len = ai[i+1] - ai[i];
1615     for (j=0; j<len; j++) {
1616       rows[0] = (*aj++)*bs;
1617       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1618       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1619       array += bs2;
1620     }
1621   }
1622   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1623 
1624   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626 
1627   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1628     *B = C;
1629   } else {
1630     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 EXTERN_C_BEGIN
1636 #undef __FUNCT__
1637 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1638 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1639 {
1640   PetscErrorCode ierr;
1641   Mat            Btrans;
1642 
1643   PetscFunctionBegin;
1644   *f = PETSC_FALSE;
1645   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1646   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1647   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1648   PetscFunctionReturn(0);
1649 }
1650 EXTERN_C_END
1651 
1652 #undef __FUNCT__
1653 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1654 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1655 {
1656   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1657   PetscErrorCode ierr;
1658   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1659   int            fd;
1660   PetscScalar    *aa;
1661   FILE           *file;
1662 
1663   PetscFunctionBegin;
1664   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1665   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1666   col_lens[0] = MAT_FILE_CLASSID;
1667 
1668   col_lens[1] = A->rmap->N;
1669   col_lens[2] = A->cmap->n;
1670   col_lens[3] = a->nz*bs2;
1671 
1672   /* store lengths of each row and write (including header) to file */
1673   count = 0;
1674   for (i=0; i<a->mbs; i++) {
1675     for (j=0; j<bs; j++) {
1676       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1677     }
1678   }
1679   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1680   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1681 
1682   /* store column indices (zero start index) */
1683   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1684   count = 0;
1685   for (i=0; i<a->mbs; i++) {
1686     for (j=0; j<bs; j++) {
1687       for (k=a->i[i]; k<a->i[i+1]; k++) {
1688         for (l=0; l<bs; l++) {
1689           jj[count++] = bs*a->j[k] + l;
1690         }
1691       }
1692     }
1693   }
1694   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1695   ierr = PetscFree(jj);CHKERRQ(ierr);
1696 
1697   /* store nonzero values */
1698   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1699   count = 0;
1700   for (i=0; i<a->mbs; i++) {
1701     for (j=0; j<bs; j++) {
1702       for (k=a->i[i]; k<a->i[i+1]; k++) {
1703         for (l=0; l<bs; l++) {
1704           aa[count++] = a->a[bs2*k + l*bs + j];
1705         }
1706       }
1707     }
1708   }
1709   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1710   ierr = PetscFree(aa);CHKERRQ(ierr);
1711 
1712   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1713   if (file) {
1714     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1715   }
1716   PetscFunctionReturn(0);
1717 }
1718 
1719 #undef __FUNCT__
1720 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1721 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1722 {
1723   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1724   PetscErrorCode    ierr;
1725   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1726   PetscViewerFormat format;
1727 
1728   PetscFunctionBegin;
1729   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1730   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1731     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1732   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1733     Mat aij;
1734     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1735     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1736     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1737   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1738      PetscFunctionReturn(0);
1739   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1740     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1741     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1742     for (i=0; i<a->mbs; i++) {
1743       for (j=0; j<bs; j++) {
1744         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1745         for (k=a->i[i]; k<a->i[i+1]; k++) {
1746           for (l=0; l<bs; l++) {
1747 #if defined(PETSC_USE_COMPLEX)
1748             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1749               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1750                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1751             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1752               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1753                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1754             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1755               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1756             }
1757 #else
1758             if (a->a[bs2*k + l*bs + j] != 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1760             }
1761 #endif
1762           }
1763         }
1764         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1765       }
1766     }
1767     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1768   } else {
1769     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1770     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1771     for (i=0; i<a->mbs; i++) {
1772       for (j=0; j<bs; j++) {
1773         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1774         for (k=a->i[i]; k<a->i[i+1]; k++) {
1775           for (l=0; l<bs; l++) {
1776 #if defined(PETSC_USE_COMPLEX)
1777             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1778               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1779                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1780             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1781               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1782                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1783             } else {
1784               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1785             }
1786 #else
1787             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1788 #endif
1789           }
1790         }
1791         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1792       }
1793     }
1794     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1795   }
1796   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1797   PetscFunctionReturn(0);
1798 }
1799 
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1802 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1803 {
1804   Mat            A = (Mat) Aa;
1805   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1806   PetscErrorCode ierr;
1807   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1808   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1809   MatScalar      *aa;
1810   PetscViewer    viewer;
1811   PetscViewerFormat format;
1812 
1813   PetscFunctionBegin;
1814   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1815   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1816 
1817   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1818 
1819   /* loop over matrix elements drawing boxes */
1820 
1821   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1822     color = PETSC_DRAW_BLUE;
1823     for (i=0,row=0; i<mbs; i++,row+=bs) {
1824       for (j=a->i[i]; j<a->i[i+1]; j++) {
1825         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1826         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1827         aa = a->a + j*bs2;
1828         for (k=0; k<bs; k++) {
1829           for (l=0; l<bs; l++) {
1830             if (PetscRealPart(*aa++) >=  0.) continue;
1831             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1832           }
1833         }
1834       }
1835     }
1836     color = PETSC_DRAW_CYAN;
1837     for (i=0,row=0; i<mbs; i++,row+=bs) {
1838       for (j=a->i[i]; j<a->i[i+1]; j++) {
1839         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1840         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1841         aa = a->a + j*bs2;
1842         for (k=0; k<bs; k++) {
1843           for (l=0; l<bs; l++) {
1844             if (PetscRealPart(*aa++) != 0.) continue;
1845             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1846           }
1847         }
1848       }
1849     }
1850     color = PETSC_DRAW_RED;
1851     for (i=0,row=0; i<mbs; i++,row+=bs) {
1852       for (j=a->i[i]; j<a->i[i+1]; j++) {
1853         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1854         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1855         aa = a->a + j*bs2;
1856         for (k=0; k<bs; k++) {
1857           for (l=0; l<bs; l++) {
1858             if (PetscRealPart(*aa++) <= 0.) continue;
1859             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1860           }
1861         }
1862       }
1863     }
1864   } else {
1865     /* use contour shading to indicate magnitude of values */
1866     /* first determine max of all nonzero values */
1867     PetscDraw   popup;
1868     PetscReal scale,maxv = 0.0;
1869 
1870     for (i=0; i<a->nz*a->bs2; i++) {
1871       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1872     }
1873     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1874     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1875     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1876     for (i=0,row=0; i<mbs; i++,row+=bs) {
1877       for (j=a->i[i]; j<a->i[i+1]; j++) {
1878         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1879         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1880         aa = a->a + j*bs2;
1881         for (k=0; k<bs; k++) {
1882           for (l=0; l<bs; l++) {
1883             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1884             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1885           }
1886         }
1887       }
1888     }
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1895 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1896 {
1897   PetscErrorCode ierr;
1898   PetscReal      xl,yl,xr,yr,w,h;
1899   PetscDraw      draw;
1900   PetscBool      isnull;
1901 
1902   PetscFunctionBegin;
1903 
1904   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1905   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1906 
1907   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1908   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1909   xr += w;    yr += h;  xl = -w;     yl = -h;
1910   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1911   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1912   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatView_SeqBAIJ"
1918 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1919 {
1920   PetscErrorCode ierr;
1921   PetscBool      iascii,isbinary,isdraw;
1922 
1923   PetscFunctionBegin;
1924   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1925   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1926   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1927   if (iascii){
1928     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1929   } else if (isbinary) {
1930     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1931   } else if (isdraw) {
1932     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1933   } else {
1934     Mat B;
1935     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1936     ierr = MatView(B,viewer);CHKERRQ(ierr);
1937     ierr = MatDestroy(&B);CHKERRQ(ierr);
1938   }
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1945 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1946 {
1947   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1948   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1949   PetscInt    *ai = a->i,*ailen = a->ilen;
1950   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1951   MatScalar   *ap,*aa = a->a;
1952 
1953   PetscFunctionBegin;
1954   for (k=0; k<m; k++) { /* loop over rows */
1955     row  = im[k]; brow = row/bs;
1956     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1957     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1958     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1959     nrow = ailen[brow];
1960     for (l=0; l<n; l++) { /* loop over columns */
1961       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1962       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1963       col  = in[l] ;
1964       bcol = col/bs;
1965       cidx = col%bs;
1966       ridx = row%bs;
1967       high = nrow;
1968       low  = 0; /* assume unsorted */
1969       while (high-low > 5) {
1970         t = (low+high)/2;
1971         if (rp[t] > bcol) high = t;
1972         else             low  = t;
1973       }
1974       for (i=low; i<high; i++) {
1975         if (rp[i] > bcol) break;
1976         if (rp[i] == bcol) {
1977           *v++ = ap[bs2*i+bs*cidx+ridx];
1978           goto finished;
1979         }
1980       }
1981       *v++ = 0.0;
1982       finished:;
1983     }
1984   }
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1990 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1991 {
1992   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1993   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1994   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1995   PetscErrorCode    ierr;
1996   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1997   PetscBool         roworiented=a->roworiented;
1998   const PetscScalar *value = v;
1999   MatScalar         *ap,*aa = a->a,*bap;
2000 
2001   PetscFunctionBegin;
2002   if (roworiented) {
2003     stepval = (n-1)*bs;
2004   } else {
2005     stepval = (m-1)*bs;
2006   }
2007   for (k=0; k<m; k++) { /* loop over added rows */
2008     row  = im[k];
2009     if (row < 0) continue;
2010 #if defined(PETSC_USE_DEBUG)
2011     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
2012 #endif
2013     rp   = aj + ai[row];
2014     ap   = aa + bs2*ai[row];
2015     rmax = imax[row];
2016     nrow = ailen[row];
2017     low  = 0;
2018     high = nrow;
2019     for (l=0; l<n; l++) { /* loop over added columns */
2020       if (in[l] < 0) continue;
2021 #if defined(PETSC_USE_DEBUG)
2022       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2023 #endif
2024       col = in[l];
2025       if (roworiented) {
2026         value = v + (k*(stepval+bs) + l)*bs;
2027       } else {
2028         value = v + (l*(stepval+bs) + k)*bs;
2029       }
2030       if (col <= lastcol) low = 0; else high = nrow;
2031       lastcol = col;
2032       while (high-low > 7) {
2033         t = (low+high)/2;
2034         if (rp[t] > col) high = t;
2035         else             low  = t;
2036       }
2037       for (i=low; i<high; i++) {
2038         if (rp[i] > col) break;
2039         if (rp[i] == col) {
2040           bap  = ap +  bs2*i;
2041           if (roworiented) {
2042             if (is == ADD_VALUES) {
2043               for (ii=0; ii<bs; ii++,value+=stepval) {
2044                 for (jj=ii; jj<bs2; jj+=bs) {
2045                   bap[jj] += *value++;
2046                 }
2047               }
2048             } else {
2049               for (ii=0; ii<bs; ii++,value+=stepval) {
2050                 for (jj=ii; jj<bs2; jj+=bs) {
2051                   bap[jj] = *value++;
2052                 }
2053               }
2054             }
2055           } else {
2056             if (is == ADD_VALUES) {
2057               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2058                 for (jj=0; jj<bs; jj++) {
2059                   bap[jj] += value[jj];
2060                 }
2061                 bap += bs;
2062               }
2063             } else {
2064               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2065                 for (jj=0; jj<bs; jj++) {
2066                   bap[jj]  = value[jj];
2067                 }
2068                 bap += bs;
2069               }
2070             }
2071           }
2072           goto noinsert2;
2073         }
2074       }
2075       if (nonew == 1) goto noinsert2;
2076       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2077       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2078       N = nrow++ - 1; high++;
2079       /* shift up all the later entries in this row */
2080       for (ii=N; ii>=i; ii--) {
2081         rp[ii+1] = rp[ii];
2082         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2083       }
2084       if (N >= i) {
2085         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2086       }
2087       rp[i] = col;
2088       bap   = ap +  bs2*i;
2089       if (roworiented) {
2090         for (ii=0; ii<bs; ii++,value+=stepval) {
2091           for (jj=ii; jj<bs2; jj+=bs) {
2092             bap[jj] = *value++;
2093           }
2094         }
2095       } else {
2096         for (ii=0; ii<bs; ii++,value+=stepval) {
2097           for (jj=0; jj<bs; jj++) {
2098             *bap++  = *value++;
2099           }
2100         }
2101       }
2102       noinsert2:;
2103       low = i;
2104     }
2105     ailen[row] = nrow;
2106   }
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2112 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2113 {
2114   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2115   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2116   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2117   PetscErrorCode ierr;
2118   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2119   MatScalar      *aa = a->a,*ap;
2120   PetscReal      ratio=0.6;
2121 
2122   PetscFunctionBegin;
2123   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2124 
2125   if (m) rmax = ailen[0];
2126   for (i=1; i<mbs; i++) {
2127     /* move each row back by the amount of empty slots (fshift) before it*/
2128     fshift += imax[i-1] - ailen[i-1];
2129     rmax   = PetscMax(rmax,ailen[i]);
2130     if (fshift) {
2131       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2132       N = ailen[i];
2133       for (j=0; j<N; j++) {
2134         ip[j-fshift] = ip[j];
2135         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2136       }
2137     }
2138     ai[i] = ai[i-1] + ailen[i-1];
2139   }
2140   if (mbs) {
2141     fshift += imax[mbs-1] - ailen[mbs-1];
2142     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2143   }
2144   /* reset ilen and imax for each row */
2145   for (i=0; i<mbs; i++) {
2146     ailen[i] = imax[i] = ai[i+1] - ai[i];
2147   }
2148   a->nz = ai[mbs];
2149 
2150   /* diagonals may have moved, so kill the diagonal pointers */
2151   a->idiagvalid = PETSC_FALSE;
2152   if (fshift && a->diag) {
2153     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2154     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2155     a->diag = 0;
2156   }
2157   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2158   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2159   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2160   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2161   A->info.mallocs     += a->reallocs;
2162   a->reallocs          = 0;
2163   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2164 
2165   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2166   A->same_nonzero = PETSC_TRUE;
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 /*
2171    This function returns an array of flags which indicate the locations of contiguous
2172    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2173    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2174    Assume: sizes should be long enough to hold all the values.
2175 */
2176 #undef __FUNCT__
2177 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2178 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2179 {
2180   PetscInt   i,j,k,row;
2181   PetscBool  flg;
2182 
2183   PetscFunctionBegin;
2184   for (i=0,j=0; i<n; j++) {
2185     row = idx[i];
2186     if (row%bs!=0) { /* Not the begining of a block */
2187       sizes[j] = 1;
2188       i++;
2189     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2190       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2191       i++;
2192     } else { /* Begining of the block, so check if the complete block exists */
2193       flg = PETSC_TRUE;
2194       for (k=1; k<bs; k++) {
2195         if (row+k != idx[i+k]) { /* break in the block */
2196           flg = PETSC_FALSE;
2197           break;
2198         }
2199       }
2200       if (flg) { /* No break in the bs */
2201         sizes[j] = bs;
2202         i+= bs;
2203       } else {
2204         sizes[j] = 1;
2205         i++;
2206       }
2207     }
2208   }
2209   *bs_max = j;
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2215 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2216 {
2217   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2218   PetscErrorCode    ierr;
2219   PetscInt          i,j,k,count,*rows;
2220   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2221   PetscScalar       zero = 0.0;
2222   MatScalar         *aa;
2223   const PetscScalar *xx;
2224   PetscScalar       *bb;
2225 
2226   PetscFunctionBegin;
2227   /* fix right hand side if needed */
2228   if (x && b) {
2229     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2230     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2231     for (i=0; i<is_n; i++) {
2232       bb[is_idx[i]] = diag*xx[is_idx[i]];
2233     }
2234     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2235     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2236   }
2237 
2238   /* Make a copy of the IS and  sort it */
2239   /* allocate memory for rows,sizes */
2240   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2241 
2242   /* copy IS values to rows, and sort them */
2243   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2244   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2245 
2246   if (baij->keepnonzeropattern) {
2247     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2248     bs_max = is_n;
2249     A->same_nonzero = PETSC_TRUE;
2250   } else {
2251     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2252     A->same_nonzero = PETSC_FALSE;
2253   }
2254 
2255   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2256     row   = rows[j];
2257     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2258     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2259     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2260     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2261       if (diag != (PetscScalar)0.0) {
2262         if (baij->ilen[row/bs] > 0) {
2263           baij->ilen[row/bs]       = 1;
2264           baij->j[baij->i[row/bs]] = row/bs;
2265           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2266         }
2267         /* Now insert all the diagonal values for this bs */
2268         for (k=0; k<bs; k++) {
2269           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2270         }
2271       } else { /* (diag == 0.0) */
2272         baij->ilen[row/bs] = 0;
2273       } /* end (diag == 0.0) */
2274     } else { /* (sizes[i] != bs) */
2275 #if defined (PETSC_USE_DEBUG)
2276       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2277 #endif
2278       for (k=0; k<count; k++) {
2279         aa[0] =  zero;
2280         aa    += bs;
2281       }
2282       if (diag != (PetscScalar)0.0) {
2283         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2284       }
2285     }
2286   }
2287 
2288   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2289   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2295 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2296 {
2297   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2298   PetscErrorCode    ierr;
2299   PetscInt          i,j,k,count;
2300   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2301   PetscScalar       zero = 0.0;
2302   MatScalar         *aa;
2303   const PetscScalar *xx;
2304   PetscScalar       *bb;
2305   PetscBool         *zeroed,vecs = PETSC_FALSE;
2306 
2307   PetscFunctionBegin;
2308   /* fix right hand side if needed */
2309   if (x && b) {
2310     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2311     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2312     vecs = PETSC_TRUE;
2313   }
2314   A->same_nonzero = PETSC_TRUE;
2315 
2316   /* zero the columns */
2317   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2318   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2319   for (i=0; i<is_n; i++) {
2320     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2321     zeroed[is_idx[i]] = PETSC_TRUE;
2322   }
2323   for (i=0; i<A->rmap->N; i++) {
2324     if (!zeroed[i]) {
2325       row = i/bs;
2326       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2327         for (k=0; k<bs; k++) {
2328           col = bs*baij->j[j] + k;
2329 	  if (zeroed[col]) {
2330 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2331             if (vecs) bb[i] -= aa[0]*xx[col];
2332             aa[0] = 0.0;
2333           }
2334         }
2335       }
2336     } else if (vecs) bb[i] = diag*xx[i];
2337   }
2338   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2339   if (vecs) {
2340     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2341     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2342   }
2343 
2344   /* zero the rows */
2345   for (i=0; i<is_n; i++) {
2346     row   = is_idx[i];
2347     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2348     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2349     for (k=0; k<count; k++) {
2350       aa[0] =  zero;
2351       aa    += bs;
2352     }
2353     if (diag != (PetscScalar)0.0) {
2354       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2355     }
2356   }
2357   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358   PetscFunctionReturn(0);
2359 }
2360 
2361 #undef __FUNCT__
2362 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2363 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2364 {
2365   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2366   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2367   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2368   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2369   PetscErrorCode ierr;
2370   PetscInt       ridx,cidx,bs2=a->bs2;
2371   PetscBool      roworiented=a->roworiented;
2372   MatScalar      *ap,value,*aa=a->a,*bap;
2373 
2374   PetscFunctionBegin;
2375   if (v) PetscValidScalarPointer(v,6);
2376   for (k=0; k<m; k++) { /* loop over added rows */
2377     row  = im[k];
2378     brow = row/bs;
2379     if (row < 0) continue;
2380 #if defined(PETSC_USE_DEBUG)
2381     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2382 #endif
2383     rp   = aj + ai[brow];
2384     ap   = aa + bs2*ai[brow];
2385     rmax = imax[brow];
2386     nrow = ailen[brow];
2387     low  = 0;
2388     high = nrow;
2389     for (l=0; l<n; l++) { /* loop over added columns */
2390       if (in[l] < 0) continue;
2391 #if defined(PETSC_USE_DEBUG)
2392       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2393 #endif
2394       col = in[l]; bcol = col/bs;
2395       ridx = row % bs; cidx = col % bs;
2396       if (roworiented) {
2397         value = v[l + k*n];
2398       } else {
2399         value = v[k + l*m];
2400       }
2401       if (col <= lastcol) low = 0; else high = nrow;
2402       lastcol = col;
2403       while (high-low > 7) {
2404         t = (low+high)/2;
2405         if (rp[t] > bcol) high = t;
2406         else              low  = t;
2407       }
2408       for (i=low; i<high; i++) {
2409         if (rp[i] > bcol) break;
2410         if (rp[i] == bcol) {
2411           bap  = ap +  bs2*i + bs*cidx + ridx;
2412           if (is == ADD_VALUES) *bap += value;
2413           else                  *bap  = value;
2414           goto noinsert1;
2415         }
2416       }
2417       if (nonew == 1) goto noinsert1;
2418       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2419       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2420       N = nrow++ - 1; high++;
2421       /* shift up all the later entries in this row */
2422       for (ii=N; ii>=i; ii--) {
2423         rp[ii+1] = rp[ii];
2424         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2425       }
2426       if (N>=i) {
2427         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2428       }
2429       rp[i]                      = bcol;
2430       ap[bs2*i + bs*cidx + ridx] = value;
2431       a->nz++;
2432       noinsert1:;
2433       low = i;
2434     }
2435     ailen[brow] = nrow;
2436   }
2437   A->same_nonzero = PETSC_FALSE;
2438   PetscFunctionReturn(0);
2439 }
2440 
2441 #undef __FUNCT__
2442 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2443 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2444 {
2445   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2446   Mat            outA;
2447   PetscErrorCode ierr;
2448   PetscBool      row_identity,col_identity;
2449 
2450   PetscFunctionBegin;
2451   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2452   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2453   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2454   if (!row_identity || !col_identity) {
2455     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2456   }
2457 
2458   outA            = inA;
2459   inA->factortype = MAT_FACTOR_LU;
2460 
2461   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2462 
2463   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2464   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2465   a->row = row;
2466   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2467   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2468   a->col = col;
2469 
2470   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2471   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2472    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2473   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2474 
2475   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2476   if (!a->solve_work) {
2477     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2478     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2479   }
2480   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2481 
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 EXTERN_C_BEGIN
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2488 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2489 {
2490   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2491   PetscInt    i,nz,mbs;
2492 
2493   PetscFunctionBegin;
2494   nz  = baij->maxnz;
2495   mbs = baij->mbs;
2496   for (i=0; i<nz; i++) {
2497     baij->j[i] = indices[i];
2498   }
2499   baij->nz = nz;
2500   for (i=0; i<mbs; i++) {
2501     baij->ilen[i] = baij->imax[i];
2502   }
2503   PetscFunctionReturn(0);
2504 }
2505 EXTERN_C_END
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2509 /*@
2510     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2511        in the matrix.
2512 
2513   Input Parameters:
2514 +  mat - the SeqBAIJ matrix
2515 -  indices - the column indices
2516 
2517   Level: advanced
2518 
2519   Notes:
2520     This can be called if you have precomputed the nonzero structure of the
2521   matrix and want to provide it to the matrix object to improve the performance
2522   of the MatSetValues() operation.
2523 
2524     You MUST have set the correct numbers of nonzeros per row in the call to
2525   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2526 
2527     MUST be called before any calls to MatSetValues();
2528 
2529 @*/
2530 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2531 {
2532   PetscErrorCode ierr;
2533 
2534   PetscFunctionBegin;
2535   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2536   PetscValidPointer(indices,2);
2537   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2538   PetscFunctionReturn(0);
2539 }
2540 
2541 #undef __FUNCT__
2542 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2543 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2544 {
2545   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2546   PetscErrorCode ierr;
2547   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2548   PetscReal      atmp;
2549   PetscScalar    *x,zero = 0.0;
2550   MatScalar      *aa;
2551   PetscInt       ncols,brow,krow,kcol;
2552 
2553   PetscFunctionBegin;
2554   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2555   bs   = A->rmap->bs;
2556   aa   = a->a;
2557   ai   = a->i;
2558   aj   = a->j;
2559   mbs  = a->mbs;
2560 
2561   ierr = VecSet(v,zero);CHKERRQ(ierr);
2562   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2563   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2564   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2565   for (i=0; i<mbs; i++) {
2566     ncols = ai[1] - ai[0]; ai++;
2567     brow  = bs*i;
2568     for (j=0; j<ncols; j++){
2569       for (kcol=0; kcol<bs; kcol++){
2570         for (krow=0; krow<bs; krow++){
2571           atmp = PetscAbsScalar(*aa);aa++;
2572           row = brow + krow;    /* row index */
2573           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2574           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2575         }
2576       }
2577       aj++;
2578     }
2579   }
2580   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatCopy_SeqBAIJ"
2586 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2587 {
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   /* If the two matrices have the same copy implementation, use fast copy. */
2592   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2593     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2594     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2595     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2596 
2597     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2598     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2599     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2600   } else {
2601     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2602   }
2603   PetscFunctionReturn(0);
2604 }
2605 
2606 #undef __FUNCT__
2607 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2608 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2609 {
2610   PetscErrorCode ierr;
2611 
2612   PetscFunctionBegin;
2613   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2614   PetscFunctionReturn(0);
2615 }
2616 
2617 #undef __FUNCT__
2618 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2619 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2620 {
2621   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2622   PetscFunctionBegin;
2623   *array = a->a;
2624   PetscFunctionReturn(0);
2625 }
2626 
2627 #undef __FUNCT__
2628 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2629 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2630 {
2631   PetscFunctionBegin;
2632   PetscFunctionReturn(0);
2633 }
2634 
2635 #undef __FUNCT__
2636 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2637 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2638 {
2639   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2640   PetscErrorCode ierr;
2641   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2642   PetscBLASInt   one=1;
2643 
2644   PetscFunctionBegin;
2645   if (str == SAME_NONZERO_PATTERN) {
2646     PetscScalar alpha = a;
2647     PetscInt bnz = PetscBLASIntCast(x->nz*bs2);
2648     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2649   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2650     if (y->xtoy && y->XtoY != X) {
2651       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2652       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2653     }
2654     if (!y->xtoy) { /* get xtoy */
2655       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2656       y->XtoY = X;
2657       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2658     }
2659     for (i=0; i<x->nz; i++) {
2660       j = 0;
2661       while (j < bs2){
2662         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2663         j++;
2664       }
2665     }
2666     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2667   } else {
2668     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2669   }
2670   PetscFunctionReturn(0);
2671 }
2672 
2673 #undef __FUNCT__
2674 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2675 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2676 {
2677   PetscInt rbs,cbs;
2678   PetscErrorCode ierr;
2679 
2680   PetscFunctionBegin;
2681   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2682   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2683   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2684   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 #undef __FUNCT__
2689 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2690 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2691 {
2692   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2693   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2694   MatScalar      *aa = a->a;
2695 
2696   PetscFunctionBegin;
2697   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2698   PetscFunctionReturn(0);
2699 }
2700 
2701 #undef __FUNCT__
2702 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2703 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2704 {
2705   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2706   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2707   MatScalar      *aa = a->a;
2708 
2709   PetscFunctionBegin;
2710   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2711   PetscFunctionReturn(0);
2712 }
2713 
2714 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2718 /*
2719     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2720 */
2721 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2722 {
2723   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2724   PetscErrorCode ierr;
2725   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2726   PetscInt       nz = a->i[m],row,*jj,mr,col;
2727 
2728   PetscFunctionBegin;
2729   *nn = n;
2730   if (!ia) PetscFunctionReturn(0);
2731   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2732   else {
2733     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2734     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2735     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2736     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2737     jj = a->j;
2738     for (i=0; i<nz; i++) {
2739       collengths[jj[i]]++;
2740     }
2741     cia[0] = oshift;
2742     for (i=0; i<n; i++) {
2743       cia[i+1] = cia[i] + collengths[i];
2744     }
2745     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2746     jj   = a->j;
2747     for (row=0; row<m; row++) {
2748       mr = a->i[row+1] - a->i[row];
2749       for (i=0; i<mr; i++) {
2750         col = *jj++;
2751         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2752       }
2753     }
2754     ierr = PetscFree(collengths);CHKERRQ(ierr);
2755     *ia = cia; *ja = cja;
2756   }
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 #undef __FUNCT__
2761 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2762 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2763 {
2764   PetscErrorCode ierr;
2765 
2766   PetscFunctionBegin;
2767   if (!ia) PetscFunctionReturn(0);
2768   ierr = PetscFree(*ia);CHKERRQ(ierr);
2769   ierr = PetscFree(*ja);CHKERRQ(ierr);
2770   PetscFunctionReturn(0);
2771 }
2772 
2773 #undef __FUNCT__
2774 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2775 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2776 {
2777   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2778   PetscErrorCode ierr;
2779   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2780   PetscScalar    dx,*y,*xx,*w3_array;
2781   PetscScalar    *vscale_array;
2782   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2783   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2784   void           *fctx = coloring->fctx;
2785   PetscBool      flg = PETSC_FALSE;
2786   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2787   Vec            x1_tmp;
2788 
2789   PetscFunctionBegin;
2790   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2791   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2792   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2793   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2794 
2795   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2796   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2797   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2798   if (flg) {
2799     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2800   } else {
2801     PetscBool  assembled;
2802     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2803     if (assembled) {
2804       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2805     }
2806   }
2807 
2808   x1_tmp = x1;
2809   if (!coloring->vscale){
2810     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2811   }
2812 
2813   /*
2814     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2815     coloring->F for the coarser grids from the finest
2816   */
2817   if (coloring->F) {
2818     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2819     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2820     if (m1 != m2) {
2821       coloring->F = 0;
2822       }
2823     }
2824 
2825   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2826     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2827   }
2828   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2829 
2830   /* Set w1 = F(x1) */
2831   if (coloring->F) {
2832     w1          = coloring->F; /* use already computed value of function */
2833     coloring->F = 0;
2834   } else {
2835     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2836     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2837     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2838   }
2839 
2840   if (!coloring->w3) {
2841     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2842     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2843   }
2844   w3 = coloring->w3;
2845 
2846     CHKMEMQ;
2847     /* Compute all the local scale factors, including ghost points */
2848   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2849   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2850   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2851   if (ctype == IS_COLORING_GHOSTED){
2852     col_start = 0; col_end = N;
2853   } else if (ctype == IS_COLORING_GLOBAL){
2854     xx = xx - start;
2855     vscale_array = vscale_array - start;
2856     col_start = start; col_end = N + start;
2857   }    CHKMEMQ;
2858   for (col=col_start; col<col_end; col++){
2859     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2860     if (coloring->htype[0] == 'w') {
2861       dx = 1.0 + unorm;
2862     } else {
2863       dx  = xx[col];
2864     }
2865     if (dx == (PetscScalar)0.0) dx = 1.0;
2866 #if !defined(PETSC_USE_COMPLEX)
2867     if (dx < umin && dx >= 0.0)      dx = umin;
2868     else if (dx < 0.0 && dx > -umin) dx = -umin;
2869 #else
2870     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2871     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2872 #endif
2873     dx               *= epsilon;
2874     vscale_array[col] = (PetscScalar)1.0/dx;
2875   }     CHKMEMQ;
2876   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2877   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2878   if (ctype == IS_COLORING_GLOBAL){
2879     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2880     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2881   }
2882   CHKMEMQ;
2883   if (coloring->vscaleforrow) {
2884     vscaleforrow = coloring->vscaleforrow;
2885   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2886 
2887   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2888   /*
2889     Loop over each color
2890   */
2891   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2892   for (k=0; k<coloring->ncolors; k++) {
2893     coloring->currentcolor = k;
2894     for (i=0; i<bs; i++) {
2895       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2896       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2897       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2898       /*
2899 	Loop over each column associated with color
2900 	adding the perturbation to the vector w3.
2901       */
2902       for (l=0; l<coloring->ncolumns[k]; l++) {
2903 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2904 	if (coloring->htype[0] == 'w') {
2905 	  dx = 1.0 + unorm;
2906 	} else {
2907 	  dx  = xx[col];
2908 	}
2909 	if (dx == (PetscScalar)0.0) dx = 1.0;
2910 #if !defined(PETSC_USE_COMPLEX)
2911 	if (dx < umin && dx >= 0.0)      dx = umin;
2912 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2913 #else
2914 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2915 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2916 #endif
2917 	dx            *= epsilon;
2918 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2919 	w3_array[col] += dx;
2920       }
2921       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2922       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2923 
2924       /*
2925 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2926 	w2 = F(x1 + dx) - F(x1)
2927       */
2928       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2929       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2930       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2931       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2932 
2933       /*
2934 	Loop over rows of vector, putting results into Jacobian matrix
2935       */
2936       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2937       for (l=0; l<coloring->nrows[k]; l++) {
2938 	row    = bs*coloring->rows[k][l];             /* local row index */
2939 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2940         for (j=0; j<bs; j++) {
2941   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2942           srows[j]  = row + start + j;
2943         }
2944 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2945       }
2946       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2947     }
2948   } /* endof for each color */
2949   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2950   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2951   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2952   ierr = PetscFree(srows);CHKERRQ(ierr);
2953 
2954   coloring->currentcolor = -1;
2955   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2956   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2957   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2958   PetscFunctionReturn(0);
2959 }
2960 
2961 /* -------------------------------------------------------------------*/
2962 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2963        MatGetRow_SeqBAIJ,
2964        MatRestoreRow_SeqBAIJ,
2965        MatMult_SeqBAIJ_N,
2966 /* 4*/ MatMultAdd_SeqBAIJ_N,
2967        MatMultTranspose_SeqBAIJ,
2968        MatMultTransposeAdd_SeqBAIJ,
2969        0,
2970        0,
2971        0,
2972 /*10*/ 0,
2973        MatLUFactor_SeqBAIJ,
2974        0,
2975        0,
2976        MatTranspose_SeqBAIJ,
2977 /*15*/ MatGetInfo_SeqBAIJ,
2978        MatEqual_SeqBAIJ,
2979        MatGetDiagonal_SeqBAIJ,
2980        MatDiagonalScale_SeqBAIJ,
2981        MatNorm_SeqBAIJ,
2982 /*20*/ 0,
2983        MatAssemblyEnd_SeqBAIJ,
2984        MatSetOption_SeqBAIJ,
2985        MatZeroEntries_SeqBAIJ,
2986 /*24*/ MatZeroRows_SeqBAIJ,
2987        0,
2988        0,
2989        0,
2990        0,
2991 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2992        0,
2993        0,
2994        MatGetArray_SeqBAIJ,
2995        MatRestoreArray_SeqBAIJ,
2996 /*34*/ MatDuplicate_SeqBAIJ,
2997        0,
2998        0,
2999        MatILUFactor_SeqBAIJ,
3000        0,
3001 /*39*/ MatAXPY_SeqBAIJ,
3002        MatGetSubMatrices_SeqBAIJ,
3003        MatIncreaseOverlap_SeqBAIJ,
3004        MatGetValues_SeqBAIJ,
3005        MatCopy_SeqBAIJ,
3006 /*44*/ 0,
3007        MatScale_SeqBAIJ,
3008        0,
3009        0,
3010        MatZeroRowsColumns_SeqBAIJ,
3011 /*49*/ MatSetBlockSize_SeqBAIJ,
3012        MatGetRowIJ_SeqBAIJ,
3013        MatRestoreRowIJ_SeqBAIJ,
3014        MatGetColumnIJ_SeqBAIJ,
3015        MatRestoreColumnIJ_SeqBAIJ,
3016 /*54*/ MatFDColoringCreate_SeqAIJ,
3017        0,
3018        0,
3019        0,
3020        MatSetValuesBlocked_SeqBAIJ,
3021 /*59*/ MatGetSubMatrix_SeqBAIJ,
3022        MatDestroy_SeqBAIJ,
3023        MatView_SeqBAIJ,
3024        0,
3025        0,
3026 /*64*/ 0,
3027        0,
3028        0,
3029        0,
3030        0,
3031 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
3032        0,
3033        MatConvert_Basic,
3034        0,
3035        0,
3036 /*74*/ 0,
3037        MatFDColoringApply_BAIJ,
3038        0,
3039        0,
3040        0,
3041 /*79*/ 0,
3042        0,
3043        0,
3044        0,
3045        MatLoad_SeqBAIJ,
3046 /*84*/ 0,
3047        0,
3048        0,
3049        0,
3050        0,
3051 /*89*/ 0,
3052        0,
3053        0,
3054        0,
3055        0,
3056 /*94*/ 0,
3057        0,
3058        0,
3059        0,
3060        0,
3061 /*99*/0,
3062        0,
3063        0,
3064        0,
3065        0,
3066 /*104*/0,
3067        MatRealPart_SeqBAIJ,
3068        MatImaginaryPart_SeqBAIJ,
3069        0,
3070        0,
3071 /*109*/0,
3072        0,
3073        0,
3074        0,
3075        MatMissingDiagonal_SeqBAIJ,
3076 /*114*/0,
3077        0,
3078        0,
3079        0,
3080        0,
3081 /*119*/0,
3082        0,
3083        MatMultHermitianTranspose_SeqBAIJ,
3084        MatMultHermitianTransposeAdd_SeqBAIJ,
3085        0,
3086 };
3087 
3088 EXTERN_C_BEGIN
3089 #undef __FUNCT__
3090 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3091 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3092 {
3093   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3094   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3095   PetscErrorCode ierr;
3096 
3097   PetscFunctionBegin;
3098   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3099 
3100   /* allocate space for values if not already there */
3101   if (!aij->saved_values) {
3102     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3103     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3104   }
3105 
3106   /* copy values over */
3107   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3108   PetscFunctionReturn(0);
3109 }
3110 EXTERN_C_END
3111 
3112 EXTERN_C_BEGIN
3113 #undef __FUNCT__
3114 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3115 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3116 {
3117   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3118   PetscErrorCode ierr;
3119   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3120 
3121   PetscFunctionBegin;
3122   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3123   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3124 
3125   /* copy values over */
3126   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3127   PetscFunctionReturn(0);
3128 }
3129 EXTERN_C_END
3130 
3131 EXTERN_C_BEGIN
3132 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3133 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3134 EXTERN_C_END
3135 
3136 EXTERN_C_BEGIN
3137 #undef __FUNCT__
3138 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3139 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3140 {
3141   Mat_SeqBAIJ    *b;
3142   PetscErrorCode ierr;
3143   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3144   PetscBool      flg,skipallocation = PETSC_FALSE;
3145 
3146   PetscFunctionBegin;
3147 
3148   if (nz == MAT_SKIP_ALLOCATION) {
3149     skipallocation = PETSC_TRUE;
3150     nz             = 0;
3151   }
3152 
3153   if (bs < 0) {
3154     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3155       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3156     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3157     bs   = PetscAbs(bs);
3158   }
3159   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3160   bs   = newbs;
3161 
3162   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3163   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3164   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3165   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3166 
3167   B->preallocated = PETSC_TRUE;
3168 
3169   mbs  = B->rmap->n/bs;
3170   nbs  = B->cmap->n/bs;
3171   bs2  = bs*bs;
3172 
3173   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3174 
3175   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3176   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3177   if (nnz) {
3178     for (i=0; i<mbs; i++) {
3179       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3180       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3181     }
3182   }
3183 
3184   b       = (Mat_SeqBAIJ*)B->data;
3185   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3186     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3187   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3188 
3189   if (!flg) {
3190     switch (bs) {
3191     case 1:
3192       B->ops->mult            = MatMult_SeqBAIJ_1;
3193       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3194       B->ops->sor             = MatSOR_SeqBAIJ_1;
3195       break;
3196     case 2:
3197       B->ops->mult            = MatMult_SeqBAIJ_2;
3198       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3199       B->ops->sor             = MatSOR_SeqBAIJ_2;
3200       break;
3201     case 3:
3202       B->ops->mult            = MatMult_SeqBAIJ_3;
3203       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3204       B->ops->sor             = MatSOR_SeqBAIJ_3;
3205       break;
3206     case 4:
3207       B->ops->mult            = MatMult_SeqBAIJ_4;
3208       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3209       B->ops->sor             = MatSOR_SeqBAIJ_4;
3210       break;
3211     case 5:
3212       B->ops->mult            = MatMult_SeqBAIJ_5;
3213       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3214       B->ops->sor             = MatSOR_SeqBAIJ_5;
3215       break;
3216     case 6:
3217       B->ops->mult            = MatMult_SeqBAIJ_6;
3218       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3219       B->ops->sor             = MatSOR_SeqBAIJ_6;
3220       break;
3221     case 7:
3222       B->ops->mult            = MatMult_SeqBAIJ_7;
3223       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3224       B->ops->sor             = MatSOR_SeqBAIJ_7;
3225       break;
3226     case 15:
3227       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3228       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3229       B->ops->sor             = MatSOR_SeqBAIJ_N;
3230       break;
3231     default:
3232       B->ops->mult            = MatMult_SeqBAIJ_N;
3233       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3234       B->ops->sor             = MatSOR_SeqBAIJ_N;
3235       break;
3236     }
3237   }
3238   B->rmap->bs  = bs;
3239   b->mbs       = mbs;
3240   b->nbs       = nbs;
3241   if (!skipallocation) {
3242     if (!b->imax) {
3243       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3244       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3245       b->free_imax_ilen = PETSC_TRUE;
3246     }
3247     /* b->ilen will count nonzeros in each block row so far. */
3248     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3249     if (!nnz) {
3250       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3251       else if (nz < 0) nz = 1;
3252       for (i=0; i<mbs; i++) b->imax[i] = nz;
3253       nz = nz*mbs;
3254     } else {
3255       nz = 0;
3256       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3257     }
3258 
3259     /* allocate the matrix space */
3260     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3261     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3262     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3263     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3264     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3265     b->singlemalloc = PETSC_TRUE;
3266     b->i[0] = 0;
3267     for (i=1; i<mbs+1; i++) {
3268       b->i[i] = b->i[i-1] + b->imax[i-1];
3269     }
3270     b->free_a     = PETSC_TRUE;
3271     b->free_ij    = PETSC_TRUE;
3272   } else {
3273     b->free_a     = PETSC_FALSE;
3274     b->free_ij    = PETSC_FALSE;
3275   }
3276 
3277   B->rmap->bs          = bs;
3278   b->bs2              = bs2;
3279   b->mbs              = mbs;
3280   b->nz               = 0;
3281   b->maxnz            = nz;
3282   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3283   PetscFunctionReturn(0);
3284 }
3285 EXTERN_C_END
3286 
3287 EXTERN_C_BEGIN
3288 #undef __FUNCT__
3289 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3290 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3291 {
3292   PetscInt       i,m,nz,nz_max=0,*nnz;
3293   PetscScalar    *values=0;
3294   PetscErrorCode ierr;
3295 
3296   PetscFunctionBegin;
3297   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3298   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3299   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3300   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3301   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3302   m = B->rmap->n/bs;
3303 
3304   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3305   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3306   for(i=0; i<m; i++) {
3307     nz = ii[i+1]- ii[i];
3308     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3309     nz_max = PetscMax(nz_max, nz);
3310     nnz[i] = nz;
3311   }
3312   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3313   ierr = PetscFree(nnz);CHKERRQ(ierr);
3314 
3315   values = (PetscScalar*)V;
3316   if (!values) {
3317     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3318     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3319   }
3320   for (i=0; i<m; i++) {
3321     PetscInt          ncols  = ii[i+1] - ii[i];
3322     const PetscInt    *icols = jj + ii[i];
3323     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3324     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3325   }
3326   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3327   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329 
3330   PetscFunctionReturn(0);
3331 }
3332 EXTERN_C_END
3333 
3334 
3335 EXTERN_C_BEGIN
3336 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3337 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3338 #if defined(PETSC_HAVE_MUMPS)
3339 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3340 #endif
3341 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3342 EXTERN_C_END
3343 
3344 /*MC
3345    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3346    block sparse compressed row format.
3347 
3348    Options Database Keys:
3349 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3350 
3351   Level: beginner
3352 
3353 .seealso: MatCreateSeqBAIJ()
3354 M*/
3355 
3356 EXTERN_C_BEGIN
3357 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3358 EXTERN_C_END
3359 
3360 EXTERN_C_BEGIN
3361 #undef __FUNCT__
3362 #define __FUNCT__ "MatCreate_SeqBAIJ"
3363 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3364 {
3365   PetscErrorCode ierr;
3366   PetscMPIInt    size;
3367   Mat_SeqBAIJ    *b;
3368 
3369   PetscFunctionBegin;
3370   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3371   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3372 
3373   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3374   B->data = (void*)b;
3375   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3376   b->row                   = 0;
3377   b->col                   = 0;
3378   b->icol                  = 0;
3379   b->reallocs              = 0;
3380   b->saved_values          = 0;
3381 
3382   b->roworiented           = PETSC_TRUE;
3383   b->nonew                 = 0;
3384   b->diag                  = 0;
3385   b->solve_work            = 0;
3386   b->mult_work             = 0;
3387   B->spptr                 = 0;
3388   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3389   b->keepnonzeropattern    = PETSC_FALSE;
3390   b->xtoy                  = 0;
3391   b->XtoY                  = 0;
3392   B->same_nonzero          = PETSC_FALSE;
3393 
3394   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3395                                      "MatGetFactorAvailable_seqbaij_petsc",
3396                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3397   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3398                                      "MatGetFactor_seqbaij_petsc",
3399                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3401                                      "MatGetFactor_seqbaij_bstrm",
3402                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3403 #if defined(PETSC_HAVE_MUMPS)
3404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3405 #endif
3406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3407                                      "MatInvertBlockDiagonal_SeqBAIJ",
3408                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3410                                      "MatStoreValues_SeqBAIJ",
3411                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3413                                      "MatRetrieveValues_SeqBAIJ",
3414                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3416                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3417                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3419                                      "MatConvert_SeqBAIJ_SeqAIJ",
3420                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3421   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3422                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3423                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3424   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3425                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3426                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3427   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3428                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3429                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3430   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3431                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3432                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3433   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3434                                      "MatIsTranspose_SeqBAIJ",
3435                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3436   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3437   PetscFunctionReturn(0);
3438 }
3439 EXTERN_C_END
3440 
3441 #undef __FUNCT__
3442 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3443 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3444 {
3445   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3446   PetscErrorCode ierr;
3447   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3448 
3449   PetscFunctionBegin;
3450   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3451 
3452   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3453     c->imax = a->imax;
3454     c->ilen = a->ilen;
3455     c->free_imax_ilen = PETSC_FALSE;
3456   } else {
3457     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3458     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3459     for (i=0; i<mbs; i++) {
3460       c->imax[i] = a->imax[i];
3461       c->ilen[i] = a->ilen[i];
3462     }
3463     c->free_imax_ilen = PETSC_TRUE;
3464   }
3465 
3466   /* allocate the matrix space */
3467   if (mallocmatspace){
3468     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3469       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3470       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3471       c->singlemalloc = PETSC_FALSE;
3472       c->free_ij      = PETSC_FALSE;
3473       c->i            = a->i;
3474       c->j            = a->j;
3475       c->parent       = A;
3476       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3477       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3478       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3479     } else {
3480       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3481       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3482       c->singlemalloc = PETSC_TRUE;
3483       c->free_ij      = PETSC_TRUE;
3484       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3485       if (mbs > 0) {
3486 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3487 	if (cpvalues == MAT_COPY_VALUES) {
3488 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3489 	} else {
3490 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3491 	}
3492       }
3493     }
3494   }
3495 
3496   c->roworiented = a->roworiented;
3497   c->nonew       = a->nonew;
3498   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3499   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3500   c->bs2         = a->bs2;
3501   c->mbs         = a->mbs;
3502   c->nbs         = a->nbs;
3503 
3504   if (a->diag) {
3505     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3506       c->diag      = a->diag;
3507       c->free_diag = PETSC_FALSE;
3508     } else {
3509       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3510       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3511       for (i=0; i<mbs; i++) {
3512         c->diag[i] = a->diag[i];
3513       }
3514       c->free_diag = PETSC_TRUE;
3515     }
3516   } else c->diag        = 0;
3517   c->nz                 = a->nz;
3518   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3519   c->solve_work         = 0;
3520   c->mult_work          = 0;
3521   c->free_a             = PETSC_TRUE;
3522   c->free_ij            = PETSC_TRUE;
3523   C->preallocated       = PETSC_TRUE;
3524   C->assembled          = PETSC_TRUE;
3525 
3526   c->compressedrow.use     = a->compressedrow.use;
3527   c->compressedrow.nrows   = a->compressedrow.nrows;
3528   c->compressedrow.check   = a->compressedrow.check;
3529   if (a->compressedrow.use){
3530     i = a->compressedrow.nrows;
3531     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3532     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3533     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3534     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3535   } else {
3536     c->compressedrow.use    = PETSC_FALSE;
3537     c->compressedrow.i      = PETSC_NULL;
3538     c->compressedrow.rindex = PETSC_NULL;
3539   }
3540   C->same_nonzero = A->same_nonzero;
3541   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3542   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3543   PetscFunctionReturn(0);
3544 }
3545 
3546 #undef __FUNCT__
3547 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3548 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3549 {
3550     PetscErrorCode ierr;
3551 
3552   PetscFunctionBegin;
3553   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3554   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3555   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3556   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3557   PetscFunctionReturn(0);
3558 }
3559 
3560 #undef __FUNCT__
3561 #define __FUNCT__ "MatLoad_SeqBAIJ"
3562 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3563 {
3564   Mat_SeqBAIJ    *a;
3565   PetscErrorCode ierr;
3566   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3567   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3568   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3569   PetscInt       *masked,nmask,tmp,bs2,ishift;
3570   PetscMPIInt    size;
3571   int            fd;
3572   PetscScalar    *aa;
3573   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3574 
3575   PetscFunctionBegin;
3576   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3577   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3578   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3579   bs2  = bs*bs;
3580 
3581   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3582   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3583   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3584   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3585   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3586   M = header[1]; N = header[2]; nz = header[3];
3587 
3588   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3589   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3590 
3591   /*
3592      This code adds extra rows to make sure the number of rows is
3593     divisible by the blocksize
3594   */
3595   mbs        = M/bs;
3596   extra_rows = bs - M + bs*(mbs);
3597   if (extra_rows == bs) extra_rows = 0;
3598   else                  mbs++;
3599   if (extra_rows) {
3600     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3601   }
3602 
3603   /* Set global sizes if not already set */
3604   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3605     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3606   } else { /* Check if the matrix global sizes are correct */
3607     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3608     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3609   }
3610 
3611   /* read in row lengths */
3612   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3613   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3614   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3615 
3616   /* read in column indices */
3617   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3618   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3619   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3620 
3621   /* loop over row lengths determining block row lengths */
3622   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3623   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3624   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3625   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3626   rowcount = 0;
3627   nzcount = 0;
3628   for (i=0; i<mbs; i++) {
3629     nmask = 0;
3630     for (j=0; j<bs; j++) {
3631       kmax = rowlengths[rowcount];
3632       for (k=0; k<kmax; k++) {
3633         tmp = jj[nzcount++]/bs;
3634         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3635       }
3636       rowcount++;
3637     }
3638     browlengths[i] += nmask;
3639     /* zero out the mask elements we set */
3640     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3641   }
3642 
3643   /* Do preallocation  */
3644   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3645   a = (Mat_SeqBAIJ*)newmat->data;
3646 
3647   /* set matrix "i" values */
3648   a->i[0] = 0;
3649   for (i=1; i<= mbs; i++) {
3650     a->i[i]      = a->i[i-1] + browlengths[i-1];
3651     a->ilen[i-1] = browlengths[i-1];
3652   }
3653   a->nz         = 0;
3654   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3655 
3656   /* read in nonzero values */
3657   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3658   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3659   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3660 
3661   /* set "a" and "j" values into matrix */
3662   nzcount = 0; jcount = 0;
3663   for (i=0; i<mbs; i++) {
3664     nzcountb = nzcount;
3665     nmask    = 0;
3666     for (j=0; j<bs; j++) {
3667       kmax = rowlengths[i*bs+j];
3668       for (k=0; k<kmax; k++) {
3669         tmp = jj[nzcount++]/bs;
3670 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3671       }
3672     }
3673     /* sort the masked values */
3674     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3675 
3676     /* set "j" values into matrix */
3677     maskcount = 1;
3678     for (j=0; j<nmask; j++) {
3679       a->j[jcount++]  = masked[j];
3680       mask[masked[j]] = maskcount++;
3681     }
3682     /* set "a" values into matrix */
3683     ishift = bs2*a->i[i];
3684     for (j=0; j<bs; j++) {
3685       kmax = rowlengths[i*bs+j];
3686       for (k=0; k<kmax; k++) {
3687         tmp       = jj[nzcountb]/bs ;
3688         block     = mask[tmp] - 1;
3689         point     = jj[nzcountb] - bs*tmp;
3690         idx       = ishift + bs2*block + j + bs*point;
3691         a->a[idx] = (MatScalar)aa[nzcountb++];
3692       }
3693     }
3694     /* zero out the mask elements we set */
3695     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3696   }
3697   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3698 
3699   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3700   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3701   ierr = PetscFree(aa);CHKERRQ(ierr);
3702   ierr = PetscFree(jj);CHKERRQ(ierr);
3703   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3704 
3705   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3706   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3707   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3708   PetscFunctionReturn(0);
3709 }
3710 
3711 #undef __FUNCT__
3712 #define __FUNCT__ "MatCreateSeqBAIJ"
3713 /*@C
3714    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3715    compressed row) format.  For good matrix assembly performance the
3716    user should preallocate the matrix storage by setting the parameter nz
3717    (or the array nnz).  By setting these parameters accurately, performance
3718    during matrix assembly can be increased by more than a factor of 50.
3719 
3720    Collective on MPI_Comm
3721 
3722    Input Parameters:
3723 +  comm - MPI communicator, set to PETSC_COMM_SELF
3724 .  bs - size of block
3725 .  m - number of rows
3726 .  n - number of columns
3727 .  nz - number of nonzero blocks  per block row (same for all rows)
3728 -  nnz - array containing the number of nonzero blocks in the various block rows
3729          (possibly different for each block row) or PETSC_NULL
3730 
3731    Output Parameter:
3732 .  A - the matrix
3733 
3734    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3735    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3736    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3737 
3738    Options Database Keys:
3739 .   -mat_no_unroll - uses code that does not unroll the loops in the
3740                      block calculations (much slower)
3741 .    -mat_block_size - size of the blocks to use
3742 
3743    Level: intermediate
3744 
3745    Notes:
3746    The number of rows and columns must be divisible by blocksize.
3747 
3748    If the nnz parameter is given then the nz parameter is ignored
3749 
3750    A nonzero block is any block that as 1 or more nonzeros in it
3751 
3752    The block AIJ format is fully compatible with standard Fortran 77
3753    storage.  That is, the stored row and column indices can begin at
3754    either one (as in Fortran) or zero.  See the users' manual for details.
3755 
3756    Specify the preallocated storage with either nz or nnz (not both).
3757    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3758    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3759    matrices.
3760 
3761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3762 @*/
3763 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3764 {
3765   PetscErrorCode ierr;
3766 
3767   PetscFunctionBegin;
3768   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3769   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3770   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3771   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3777 /*@C
3778    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3779    per row in the matrix. For good matrix assembly performance the
3780    user should preallocate the matrix storage by setting the parameter nz
3781    (or the array nnz).  By setting these parameters accurately, performance
3782    during matrix assembly can be increased by more than a factor of 50.
3783 
3784    Collective on MPI_Comm
3785 
3786    Input Parameters:
3787 +  A - the matrix
3788 .  bs - size of block
3789 .  nz - number of block nonzeros per block row (same for all rows)
3790 -  nnz - array containing the number of block nonzeros in the various block rows
3791          (possibly different for each block row) or PETSC_NULL
3792 
3793    Options Database Keys:
3794 .   -mat_no_unroll - uses code that does not unroll the loops in the
3795                      block calculations (much slower)
3796 .    -mat_block_size - size of the blocks to use
3797 
3798    Level: intermediate
3799 
3800    Notes:
3801    If the nnz parameter is given then the nz parameter is ignored
3802 
3803    You can call MatGetInfo() to get information on how effective the preallocation was;
3804    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3805    You can also run with the option -info and look for messages with the string
3806    malloc in them to see if additional memory allocation was needed.
3807 
3808    The block AIJ format is fully compatible with standard Fortran 77
3809    storage.  That is, the stored row and column indices can begin at
3810    either one (as in Fortran) or zero.  See the users' manual for details.
3811 
3812    Specify the preallocated storage with either nz or nnz (not both).
3813    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3814    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3815 
3816 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3817 @*/
3818 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3819 {
3820   PetscErrorCode ierr;
3821 
3822   PetscFunctionBegin;
3823   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3824   PetscFunctionReturn(0);
3825 }
3826 
3827 #undef __FUNCT__
3828 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3829 /*@C
3830    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3831    (the default sequential PETSc format).
3832 
3833    Collective on MPI_Comm
3834 
3835    Input Parameters:
3836 +  A - the matrix
3837 .  i - the indices into j for the start of each local row (starts with zero)
3838 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3839 -  v - optional values in the matrix
3840 
3841    Level: developer
3842 
3843 .keywords: matrix, aij, compressed row, sparse
3844 
3845 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3846 @*/
3847 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3848 {
3849   PetscErrorCode ierr;
3850 
3851   PetscFunctionBegin;
3852   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3853   PetscFunctionReturn(0);
3854 }
3855 
3856 
3857 #undef __FUNCT__
3858 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3859 /*@
3860      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3861 
3862      Collective on MPI_Comm
3863 
3864    Input Parameters:
3865 +  comm - must be an MPI communicator of size 1
3866 .  bs - size of block
3867 .  m - number of rows
3868 .  n - number of columns
3869 .  i - row indices
3870 .  j - column indices
3871 -  a - matrix values
3872 
3873    Output Parameter:
3874 .  mat - the matrix
3875 
3876    Level: advanced
3877 
3878    Notes:
3879        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3880     once the matrix is destroyed
3881 
3882        You cannot set new nonzero locations into this matrix, that will generate an error.
3883 
3884        The i and j indices are 0 based
3885 
3886        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3887 
3888 
3889 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3890 
3891 @*/
3892 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3893 {
3894   PetscErrorCode ierr;
3895   PetscInt       ii;
3896   Mat_SeqBAIJ    *baij;
3897 
3898   PetscFunctionBegin;
3899   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3900   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3901 
3902   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3903   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3904   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3905   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3906   baij = (Mat_SeqBAIJ*)(*mat)->data;
3907   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3908   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3909 
3910   baij->i = i;
3911   baij->j = j;
3912   baij->a = a;
3913   baij->singlemalloc = PETSC_FALSE;
3914   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3915   baij->free_a       = PETSC_FALSE;
3916   baij->free_ij      = PETSC_FALSE;
3917 
3918   for (ii=0; ii<m; ii++) {
3919     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3920 #if defined(PETSC_USE_DEBUG)
3921     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3922 #endif
3923   }
3924 #if defined(PETSC_USE_DEBUG)
3925   for (ii=0; ii<baij->i[m]; ii++) {
3926     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3927     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3928   }
3929 #endif
3930 
3931   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3932   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3933   PetscFunctionReturn(0);
3934 }
3935