xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 01a79839fc82a7dabb7a87cd2a8bb532c6bfa88d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode  MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 EXTERN_C_BEGIN
36 #undef __FUNCT__
37 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
38 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A)
39 {
40   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
41   PetscErrorCode ierr;
42   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
43   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
44   PetscReal      shift = 0.0;
45 
46   PetscFunctionBegin;
47   if (a->idiagvalid) PetscFunctionReturn(0);
48   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
49   diag_offset = a->diag;
50   if (!a->idiag) {
51     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
52     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
53   }
54   diag  = a->idiag;
55   mdiag = a->idiag+bs2*mbs;
56   /* factor and invert each block */
57   switch (bs){
58     case 1:
59       for (i=0; i<mbs; i++) {
60         odiag = v + 1*diag_offset[i];
61         diag[0]  = odiag[0];
62         mdiag[0] = odiag[0];
63         diag[0]  = 1.0 / (diag[0] + shift);
64         diag    += 1;
65         mdiag   += 1;
66       }
67       break;
68     case 2:
69       for (i=0; i<mbs; i++) {
70         odiag   = v + 4*diag_offset[i];
71         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
72 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
73 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
74 	diag    += 4;
75 	mdiag   += 4;
76       }
77       break;
78     case 3:
79       for (i=0; i<mbs; i++) {
80         odiag    = v + 9*diag_offset[i];
81         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
82         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
83         diag[8]  = odiag[8];
84         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
85         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
86         mdiag[8] = odiag[8];
87 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
88 	diag    += 9;
89 	mdiag   += 9;
90       }
91       break;
92     case 4:
93       for (i=0; i<mbs; i++) {
94         odiag  = v + 16*diag_offset[i];
95         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
97 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
98 	diag  += 16;
99 	mdiag += 16;
100       }
101       break;
102     case 5:
103       for (i=0; i<mbs; i++) {
104         odiag = v + 25*diag_offset[i];
105         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
107 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
108 	diag  += 25;
109 	mdiag += 25;
110       }
111       break;
112     case 6:
113       for (i=0; i<mbs; i++) {
114         odiag = v + 36*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
117 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
118 	diag  += 36;
119 	mdiag += 36;
120       }
121       break;
122     case 7:
123       for (i=0; i<mbs; i++) {
124         odiag = v + 49*diag_offset[i];
125         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
127 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
128 	diag  += 49;
129 	mdiag += 49;
130       }
131       break;
132     default:
133       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
134       for (i=0; i<mbs; i++) {
135         odiag = v + bs2*diag_offset[i];
136         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
139 	diag  += bs2;
140 	mdiag += bs2;
141       }
142       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
143   }
144   a->idiagvalid = PETSC_TRUE;
145   PetscFunctionReturn(0);
146 }
147 EXTERN_C_END
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
151 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
152 {
153   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
154   PetscScalar        *x,x1,s1;
155   const PetscScalar  *b;
156   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
157   PetscErrorCode     ierr;
158   PetscInt           m = a->mbs,i,i2,nz,j;
159   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
160 
161   PetscFunctionBegin;
162   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
163   its = its*lits;
164   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
169 
170   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
171 
172   diag  = a->diag;
173   idiag = a->idiag;
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
179       x[0] = b[0]*idiag[0];
180       i2     = 1;
181       idiag += 1;
182       for (i=1; i<m; i++) {
183         v     = aa + ai[i];
184         vi    = aj + ai[i];
185         nz    = diag[i] - ai[i];
186         s1    = b[i2];
187         for (j=0; j<nz; j++) {
188           s1 -= v[j]*x[vi[j]];
189         }
190         x[i2]   = idiag[0]*s1;
191         idiag   += 1;
192         i2      += 1;
193       }
194       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
195       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
196     }
197     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
198         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
199       i2    = 0;
200       mdiag = a->idiag+a->mbs;
201       for (i=0; i<m; i++) {
202         x1      = x[i2];
203         x[i2]   = mdiag[0]*x1;
204         mdiag  += 1;
205         i2     += 1;
206       }
207       ierr = PetscLogFlops(m);CHKERRQ(ierr);
208     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
209       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
210     }
211     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
212       idiag   = a->idiag+a->mbs - 1;
213       i2      = m - 1;
214       x1      = x[i2];
215       x[i2]   = idiag[0]*x1;
216       idiag -= 1;
217       i2    -= 1;
218       for (i=m-2; i>=0; i--) {
219         v     = aa + (diag[i]+1);
220         vi    = aj + diag[i] + 1;
221         nz    = ai[i+1] - diag[i] - 1;
222         s1    = x[i2];
223         for (j=0; j<nz; j++) {
224           s1 -= v[j]*x[vi[j]];
225         }
226         x[i2]   = idiag[0]*s1;
227         idiag   -= 1;
228         i2      -= 1;
229       }
230       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
231     }
232   } else {
233     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
234   }
235   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
236   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
242 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
243 {
244   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
245   PetscScalar        *x,x1,x2,s1,s2;
246   const PetscScalar  *b;
247   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
248   PetscErrorCode     ierr;
249   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
250   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
251 
252   PetscFunctionBegin;
253   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
254   its = its*lits;
255   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
256   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
257   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
258   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
259   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
260 
261   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
262 
263   diag  = a->diag;
264   idiag = a->idiag;
265   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
266   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
267 
268   if (flag & SOR_ZERO_INITIAL_GUESS) {
269     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
270       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
271       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
272       i2     = 2;
273       idiag += 4;
274       for (i=1; i<m; i++) {
275 	v     = aa + 4*ai[i];
276 	vi    = aj + ai[i];
277 	nz    = diag[i] - ai[i];
278 	s1    = b[i2]; s2 = b[i2+1];
279         for (j=0; j<nz; j++) {
280 	  idx  = 2*vi[j];
281           it   = 4*j;
282 	  x1   = x[idx]; x2 = x[1+idx];
283 	  s1  -= v[it]*x1 + v[it+2]*x2;
284 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
285 	}
286 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
287 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
288         idiag   += 4;
289         i2      += 2;
290       }
291       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
292       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
293     }
294     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
295         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
296       i2    = 0;
297       mdiag = a->idiag+4*a->mbs;
298       for (i=0; i<m; i++) {
299         x1      = x[i2]; x2 = x[i2+1];
300         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
301         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
302         mdiag  += 4;
303         i2     += 2;
304       }
305       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
306     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
307       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
308     }
309     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
310       idiag   = a->idiag+4*a->mbs - 4;
311       i2      = 2*m - 2;
312       x1      = x[i2]; x2 = x[i2+1];
313       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
314       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
315       idiag -= 4;
316       i2    -= 2;
317       for (i=m-2; i>=0; i--) {
318 	v     = aa + 4*(diag[i]+1);
319 	vi    = aj + diag[i] + 1;
320 	nz    = ai[i+1] - diag[i] - 1;
321 	s1    = x[i2]; s2 = x[i2+1];
322         for (j=0; j<nz; j++) {
323  	  idx  = 2*vi[j];
324           it   = 4*j;
325 	  x1   = x[idx]; x2 = x[1+idx];
326 	  s1  -= v[it]*x1 + v[it+2]*x2;
327 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
328 	}
329 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
330 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
331         idiag   -= 4;
332         i2      -= 2;
333       }
334       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
335     }
336   } else {
337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
338   }
339   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
340   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
346 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
347 {
348   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
349   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
350   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
351   const PetscScalar  *b;
352   PetscErrorCode     ierr;
353   PetscInt           m = a->mbs,i,i2,nz,idx;
354   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
355 
356   PetscFunctionBegin;
357   its = its*lits;
358   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
359   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
360   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
361   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
362   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
363   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
364 
365   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
366 
367   diag  = a->diag;
368   idiag = a->idiag;
369   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
370   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
371 
372   if (flag & SOR_ZERO_INITIAL_GUESS) {
373     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
374       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
375       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
376       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
377       i2     = 3;
378       idiag += 9;
379       for (i=1; i<m; i++) {
380         v     = aa + 9*ai[i];
381         vi    = aj + ai[i];
382         nz    = diag[i] - ai[i];
383         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
384         while (nz--) {
385           idx  = 3*(*vi++);
386           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
387           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
388           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
389           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
390           v   += 9;
391         }
392         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
393         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
394         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
395         idiag   += 9;
396         i2      += 3;
397       }
398       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
399       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
400     }
401     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
402         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
403       i2    = 0;
404       mdiag = a->idiag+9*a->mbs;
405       for (i=0; i<m; i++) {
406         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
408         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
409         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
410         mdiag  += 9;
411         i2     += 3;
412       }
413       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
414     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
415       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
416     }
417     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
418       idiag   = a->idiag+9*a->mbs - 9;
419       i2      = 3*m - 3;
420       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
421       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
422       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
423       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
424       idiag -= 9;
425       i2    -= 3;
426       for (i=m-2; i>=0; i--) {
427         v     = aa + 9*(diag[i]+1);
428         vi    = aj + diag[i] + 1;
429         nz    = ai[i+1] - diag[i] - 1;
430         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
431         while (nz--) {
432           idx  = 3*(*vi++);
433           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
434           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
435           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
436           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
437           v   += 9;
438         }
439         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
440         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
441         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
442         idiag   -= 9;
443         i2      -= 3;
444       }
445       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
446     }
447   } else {
448     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
449   }
450   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
451   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
457 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
458 {
459   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
460   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
461   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
462   const PetscScalar  *b;
463   PetscErrorCode     ierr;
464   PetscInt           m = a->mbs,i,i2,nz,idx;
465   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
466 
467   PetscFunctionBegin;
468   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
469   its = its*lits;
470   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
471   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
472   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
473   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
474   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
475 
476   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
477 
478   diag  = a->diag;
479   idiag = a->idiag;
480   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
481   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
482 
483   if (flag & SOR_ZERO_INITIAL_GUESS) {
484     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
485       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
486       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
487       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
488       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
489       i2     = 4;
490       idiag += 16;
491       for (i=1; i<m; i++) {
492 	v     = aa + 16*ai[i];
493 	vi    = aj + ai[i];
494 	nz    = diag[i] - ai[i];
495 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
496 	while (nz--) {
497 	  idx  = 4*(*vi++);
498 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
499 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
500 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
501 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
502 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
503 	  v   += 16;
504 	}
505 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
506 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
507 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
508 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
509         idiag   += 16;
510         i2      += 4;
511       }
512       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
513       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
514     }
515     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
516         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
517       i2    = 0;
518       mdiag = a->idiag+16*a->mbs;
519       for (i=0; i<m; i++) {
520         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
521         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
522         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
523         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
524         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
525         mdiag  += 16;
526         i2     += 4;
527       }
528       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
529     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
530       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
531     }
532     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
533       idiag   = a->idiag+16*a->mbs - 16;
534       i2      = 4*m - 4;
535       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
536       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
537       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
538       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
539       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
540       idiag -= 16;
541       i2    -= 4;
542       for (i=m-2; i>=0; i--) {
543 	v     = aa + 16*(diag[i]+1);
544 	vi    = aj + diag[i] + 1;
545 	nz    = ai[i+1] - diag[i] - 1;
546 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
547 	while (nz--) {
548 	  idx  = 4*(*vi++);
549 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
550 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
551 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
552 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
553 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
554 	  v   += 16;
555 	}
556 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
557 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
558 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
559 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
560         idiag   -= 16;
561         i2      -= 4;
562       }
563       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
564     }
565   } else {
566     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
567   }
568   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
575 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
576 {
577   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
578   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
579   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
580   const PetscScalar  *b;
581   PetscErrorCode     ierr;
582   PetscInt           m = a->mbs,i,i2,nz,idx;
583   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
584 
585   PetscFunctionBegin;
586   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
587   its = its*lits;
588   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
589   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
590   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
591   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
592   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
593 
594   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
595 
596   diag  = a->diag;
597   idiag = a->idiag;
598   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
599   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
600 
601   if (flag & SOR_ZERO_INITIAL_GUESS) {
602     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
603       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
604       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
605       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
606       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
607       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
608       i2     = 5;
609       idiag += 25;
610       for (i=1; i<m; i++) {
611 	v     = aa + 25*ai[i];
612 	vi    = aj + ai[i];
613 	nz    = diag[i] - ai[i];
614 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
615 	while (nz--) {
616 	  idx  = 5*(*vi++);
617 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
618 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
619 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
620 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
621 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
622 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
623 	  v   += 25;
624 	}
625 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
626 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
627 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
628 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
629 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
630         idiag   += 25;
631         i2      += 5;
632       }
633       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
634       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
635     }
636     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
637         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
638       i2    = 0;
639       mdiag = a->idiag+25*a->mbs;
640       for (i=0; i<m; i++) {
641         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
642         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
643         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
644         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
645         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
646         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
647         mdiag  += 25;
648         i2     += 5;
649       }
650       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
651     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
652       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
653     }
654     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
655       idiag   = a->idiag+25*a->mbs - 25;
656       i2      = 5*m - 5;
657       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
658       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
659       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
660       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
661       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
662       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
663       idiag -= 25;
664       i2    -= 5;
665       for (i=m-2; i>=0; i--) {
666 	v     = aa + 25*(diag[i]+1);
667 	vi    = aj + diag[i] + 1;
668 	nz    = ai[i+1] - diag[i] - 1;
669 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
670 	while (nz--) {
671 	  idx  = 5*(*vi++);
672 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
673 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
674 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
675 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
676 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
677 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
678 	  v   += 25;
679 	}
680 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
681 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
682 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
683 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
684 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
685         idiag   -= 25;
686         i2      -= 5;
687       }
688       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
689     }
690   } else {
691     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
692   }
693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
694   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
700 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
701 {
702   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
703   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
704   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
705   const PetscScalar  *b;
706   PetscErrorCode     ierr;
707   PetscInt           m = a->mbs,i,i2,nz,idx;
708   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
709 
710   PetscFunctionBegin;
711   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
712   its = its*lits;
713   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
714   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
715   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
716   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
717   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
718 
719   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
720 
721   diag  = a->diag;
722   idiag = a->idiag;
723   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
724   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
725 
726   if (flag & SOR_ZERO_INITIAL_GUESS) {
727     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
728       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
729       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
730       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
731       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
732       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
733       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
734       i2     = 6;
735       idiag += 36;
736       for (i=1; i<m; i++) {
737         v     = aa + 36*ai[i];
738         vi    = aj + ai[i];
739         nz    = diag[i] - ai[i];
740         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
741         while (nz--) {
742           idx  = 6*(*vi++);
743           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
744           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
745           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
746           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
747           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
748           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
749           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
750           v   += 36;
751         }
752         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
753         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
754         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
755         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
756         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
757         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
758         idiag   += 36;
759         i2      += 6;
760       }
761       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
762       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
763     }
764     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
765         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
766       i2    = 0;
767       mdiag = a->idiag+36*a->mbs;
768       for (i=0; i<m; i++) {
769         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
770         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
771         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
772         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
773         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
774         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
775         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
776         mdiag  += 36;
777         i2     += 6;
778       }
779       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
780     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
781       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
782     }
783     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
784       idiag   = a->idiag+36*a->mbs - 36;
785       i2      = 6*m - 6;
786       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
787       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
788       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
789       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
790       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
791       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
792       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
793       idiag -= 36;
794       i2    -= 6;
795       for (i=m-2; i>=0; i--) {
796         v     = aa + 36*(diag[i]+1);
797         vi    = aj + diag[i] + 1;
798         nz    = ai[i+1] - diag[i] - 1;
799         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
800         while (nz--) {
801           idx  = 6*(*vi++);
802           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
803           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
804           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
805           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
806           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
807           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
808           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
809           v   += 36;
810         }
811         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
812         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
813         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
814         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
815         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
816         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
817         idiag   -= 36;
818         i2      -= 6;
819       }
820       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
821     }
822   } else {
823     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
824   }
825   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
826   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
832 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
833 {
834   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
835   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
836   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
837   const PetscScalar  *b;
838   PetscErrorCode     ierr;
839   PetscInt           m = a->mbs,i,i2,nz,idx;
840   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
841 
842   PetscFunctionBegin;
843   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
844   its = its*lits;
845   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
846   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
847   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
848   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
849   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
850 
851   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
852 
853   diag  = a->diag;
854   idiag = a->idiag;
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
857 
858   if (flag & SOR_ZERO_INITIAL_GUESS) {
859     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
860       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
861       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
862       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
863       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
864       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
865       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
866       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
867       i2     = 7;
868       idiag += 49;
869       for (i=1; i<m; i++) {
870         v     = aa + 49*ai[i];
871         vi    = aj + ai[i];
872         nz    = diag[i] - ai[i];
873         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
874         while (nz--) {
875           idx  = 7*(*vi++);
876           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
877           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
878           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
879           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
880           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
881           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
882           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
883           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
884           v   += 49;
885         }
886         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
887         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
888         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
889         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
890         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
891         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
892         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
893         idiag   += 49;
894         i2      += 7;
895       }
896       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
897       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
898     }
899     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
900         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
901       i2    = 0;
902       mdiag = a->idiag+49*a->mbs;
903       for (i=0; i<m; i++) {
904         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
905         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
906         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
907         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
908         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
909         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
910         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
911         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
912         mdiag  += 49;
913         i2     += 7;
914       }
915       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
916     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
917       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
918     }
919     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
920       idiag   = a->idiag+49*a->mbs - 49;
921       i2      = 7*m - 7;
922       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
923       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
924       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
925       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
926       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
927       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
928       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
929       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
930       idiag -= 49;
931       i2    -= 7;
932       for (i=m-2; i>=0; i--) {
933         v     = aa + 49*(diag[i]+1);
934         vi    = aj + diag[i] + 1;
935         nz    = ai[i+1] - diag[i] - 1;
936         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
937         while (nz--) {
938           idx  = 7*(*vi++);
939           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
940           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
941           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
942           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
943           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
944           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
945           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
946           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
947           v   += 49;
948         }
949         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
950         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
951         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
952         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
953         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
954         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
955         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
956         idiag   -= 49;
957         i2      -= 7;
958       }
959       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
960     }
961   } else {
962     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
963   }
964   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
971 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
972 {
973   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
974   PetscScalar        *x,*work,*w,*workt;
975   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
976   const PetscScalar  *b;
977   PetscErrorCode     ierr;
978   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
979   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
980 
981   PetscFunctionBegin;
982   its = its*lits;
983   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
984   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
985   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
986   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
987   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
988   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
989 
990   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
991 
992   diag  = a->diag;
993   idiag = a->idiag;
994   if (!a->mult_work) {
995     k    = PetscMax(A->rmap->n,A->cmap->n);
996     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
997   }
998   work = a->mult_work;
999   if (!a->sor_work) {
1000     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1001   }
1002   w = a->sor_work;
1003 
1004   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1006 
1007   if (flag & SOR_ZERO_INITIAL_GUESS) {
1008     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1009       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1010       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1011       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1012       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1013       i2     = bs;
1014       idiag += bs2;
1015       for (i=1; i<m; i++) {
1016         v     = aa + bs2*ai[i];
1017         vi    = aj + ai[i];
1018         nz    = diag[i] - ai[i];
1019 
1020         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1021 	/* copy all rows of x that are needed into contiguous space */
1022         workt = work;
1023         for (j=0; j<nz; j++) {
1024           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1025           workt += bs;
1026         }
1027         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1028        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1029         while (nz--) {
1030           idx  = N*(*vi++);
1031           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1032           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1033           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1034           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1035           v   += N2;
1036 	  } */
1037 
1038         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1039         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1040         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1041         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1042 
1043         idiag   += bs2;
1044         i2      += bs;
1045       }
1046       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1047       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1048     }
1049     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1050         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1051       i2    = 0;
1052       mdiag = a->idiag+bs2*a->mbs;
1053       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1054       for (i=0; i<m; i++) {
1055         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1056         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1057         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1058         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1059         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1060 
1061         mdiag  += bs2;
1062         i2     += bs;
1063       }
1064       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1065     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1066       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1067     }
1068     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1069       idiag   = a->idiag+bs2*a->mbs - bs2;
1070       i2      = bs*m - bs;
1071       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1072       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1073       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1074       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1075       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1076       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1077       idiag -= bs2;
1078       i2    -= bs;
1079       for (i=m-2; i>=0; i--) {
1080         v     = aa + bs2*(diag[i]+1);
1081         vi    = aj + diag[i] + 1;
1082         nz    = ai[i+1] - diag[i] - 1;
1083 
1084         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1085 	/* copy all rows of x that are needed into contiguous space */
1086         workt = work;
1087         for (j=0; j<nz; j++) {
1088           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1089           workt += bs;
1090         }
1091         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1092         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1093         while (nz--) {
1094           idx  = N*(*vi++);
1095           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1096           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1097           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1098           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1099           v   += N2;
1100 	  } */
1101 
1102         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1103         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1104         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1105         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1106         idiag   -= bs2;
1107         i2      -= bs;
1108       }
1109       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1110     }
1111   } else {
1112     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1113   }
1114   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1115   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1121 */
1122 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1123 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1125 #define matsetvaluesblocked4_ matsetvaluesblocked4
1126 #endif
1127 
1128 EXTERN_C_BEGIN
1129 #undef __FUNCT__
1130 #define __FUNCT__ "matsetvaluesblocked4_"
1131 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1132 {
1133   Mat               A = *AA;
1134   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1135   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1136   PetscInt          *ai=a->i,*ailen=a->ilen;
1137   PetscInt          *aj=a->j,stepval,lastcol = -1;
1138   const PetscScalar *value = v;
1139   MatScalar         *ap,*aa = a->a,*bap;
1140 
1141   PetscFunctionBegin;
1142   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1143   stepval = (n-1)*4;
1144   for (k=0; k<m; k++) { /* loop over added rows */
1145     row  = im[k];
1146     rp   = aj + ai[row];
1147     ap   = aa + 16*ai[row];
1148     nrow = ailen[row];
1149     low  = 0;
1150     high = nrow;
1151     for (l=0; l<n; l++) { /* loop over added columns */
1152       col = in[l];
1153       if (col <= lastcol) low = 0; else high = nrow;
1154       lastcol = col;
1155       value = v + k*(stepval+4 + l)*4;
1156       while (high-low > 7) {
1157         t = (low+high)/2;
1158         if (rp[t] > col) high = t;
1159         else             low  = t;
1160       }
1161       for (i=low; i<high; i++) {
1162         if (rp[i] > col) break;
1163         if (rp[i] == col) {
1164           bap  = ap +  16*i;
1165           for (ii=0; ii<4; ii++,value+=stepval) {
1166             for (jj=ii; jj<16; jj+=4) {
1167               bap[jj] += *value++;
1168             }
1169           }
1170           goto noinsert2;
1171         }
1172       }
1173       N = nrow++ - 1;
1174       high++; /* added new column index thus must search to one higher than before */
1175       /* shift up all the later entries in this row */
1176       for (ii=N; ii>=i; ii--) {
1177         rp[ii+1] = rp[ii];
1178         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1179       }
1180       if (N >= i) {
1181         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1182       }
1183       rp[i] = col;
1184       bap   = ap +  16*i;
1185       for (ii=0; ii<4; ii++,value+=stepval) {
1186         for (jj=ii; jj<16; jj+=4) {
1187           bap[jj] = *value++;
1188         }
1189       }
1190       noinsert2:;
1191       low = i;
1192     }
1193     ailen[row] = nrow;
1194   }
1195   PetscFunctionReturnVoid();
1196 }
1197 EXTERN_C_END
1198 
1199 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1200 #define matsetvalues4_ MATSETVALUES4
1201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1202 #define matsetvalues4_ matsetvalues4
1203 #endif
1204 
1205 EXTERN_C_BEGIN
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatSetValues4_"
1208 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1209 {
1210   Mat         A = *AA;
1211   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1212   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1213   PetscInt    *ai=a->i,*ailen=a->ilen;
1214   PetscInt    *aj=a->j,brow,bcol;
1215   PetscInt    ridx,cidx,lastcol = -1;
1216   MatScalar   *ap,value,*aa=a->a,*bap;
1217 
1218   PetscFunctionBegin;
1219   for (k=0; k<m; k++) { /* loop over added rows */
1220     row  = im[k]; brow = row/4;
1221     rp   = aj + ai[brow];
1222     ap   = aa + 16*ai[brow];
1223     nrow = ailen[brow];
1224     low  = 0;
1225     high = nrow;
1226     for (l=0; l<n; l++) { /* loop over added columns */
1227       col = in[l]; bcol = col/4;
1228       ridx = row % 4; cidx = col % 4;
1229       value = v[l + k*n];
1230       if (col <= lastcol) low = 0; else high = nrow;
1231       lastcol = col;
1232       while (high-low > 7) {
1233         t = (low+high)/2;
1234         if (rp[t] > bcol) high = t;
1235         else              low  = t;
1236       }
1237       for (i=low; i<high; i++) {
1238         if (rp[i] > bcol) break;
1239         if (rp[i] == bcol) {
1240           bap  = ap +  16*i + 4*cidx + ridx;
1241           *bap += value;
1242           goto noinsert1;
1243         }
1244       }
1245       N = nrow++ - 1;
1246       high++; /* added new column thus must search to one higher than before */
1247       /* shift up all the later entries in this row */
1248       for (ii=N; ii>=i; ii--) {
1249         rp[ii+1] = rp[ii];
1250         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1251       }
1252       if (N>=i) {
1253         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1254       }
1255       rp[i]                    = bcol;
1256       ap[16*i + 4*cidx + ridx] = value;
1257       noinsert1:;
1258       low = i;
1259     }
1260     ailen[brow] = nrow;
1261   }
1262   PetscFunctionReturnVoid();
1263 }
1264 EXTERN_C_END
1265 
1266 /*
1267      Checks for missing diagonals
1268 */
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1271 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1272 {
1273   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1274   PetscErrorCode ierr;
1275   PetscInt       *diag,*jj = a->j,i;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1279   *missing = PETSC_FALSE;
1280   if (A->rmap->n > 0 && !jj) {
1281     *missing  = PETSC_TRUE;
1282     if (d) *d = 0;
1283     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1284   } else {
1285     diag     = a->diag;
1286     for (i=0; i<a->mbs; i++) {
1287       if (jj[diag[i]] != i) {
1288         *missing  = PETSC_TRUE;
1289         if (d) *d = i;
1290         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1291       }
1292     }
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1299 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1300 {
1301   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303   PetscInt       i,j,m = a->mbs;
1304 
1305   PetscFunctionBegin;
1306   if (!a->diag) {
1307     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1308     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1309     a->free_diag = PETSC_TRUE;
1310   }
1311   for (i=0; i<m; i++) {
1312     a->diag[i] = a->i[i+1];
1313     for (j=a->i[i]; j<a->i[i+1]; j++) {
1314       if (a->j[j] == i) {
1315         a->diag[i] = j;
1316         break;
1317       }
1318     }
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 
1324 extern PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1328 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1333   PetscInt       *tia, *tja;
1334 
1335   PetscFunctionBegin;
1336   *nn = n;
1337   if (!ia) PetscFunctionReturn(0);
1338   if (symmetric) {
1339     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1340     nz   = tia[n];
1341   } else {
1342     tia = a->i; tja = a->j;
1343   }
1344 
1345   if (!blockcompressed && bs > 1) {
1346     (*nn) *= bs;
1347     /* malloc & create the natural set of indices */
1348     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1349     if (n) {
1350       (*ia)[0] = 0;
1351       for (j=1; j<bs; j++) {
1352         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1353       }
1354     }
1355 
1356     for (i=1; i<n; i++) {
1357       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1358       for (j=1; j<bs; j++) {
1359         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1360       }
1361     }
1362     if (n) {
1363       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1364     }
1365 
1366     if (ja) {
1367       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1368       cnt = 0;
1369       for (i=0; i<n; i++) {
1370         for (j=0; j<bs; j++) {
1371           for (k=tia[i]; k<tia[i+1]; k++) {
1372             for (l=0; l<bs; l++) {
1373               (*ja)[cnt++] = bs*tja[k] + l;
1374 	    }
1375           }
1376         }
1377       }
1378     }
1379 
1380     n     *= bs;
1381     nz *= bs*bs;
1382     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1383       ierr = PetscFree(tia);CHKERRQ(ierr);
1384       ierr = PetscFree(tja);CHKERRQ(ierr);
1385     }
1386   } else if (oshift == 1) {
1387     if (symmetric) {
1388       PetscInt nz = tia[A->rmap->n/bs];
1389       /*  add 1 to i and j indices */
1390       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1391       *ia = tia;
1392       if (ja) {
1393 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1394         *ja = tja;
1395       }
1396     } else {
1397       PetscInt nz = a->i[A->rmap->n/bs];
1398       /* malloc space and  add 1 to i and j indices */
1399       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1400       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1401       if (ja) {
1402 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1403 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1404       }
1405     }
1406   } else {
1407     *ia = tia;
1408     if (ja) *ja = tja;
1409   }
1410 
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1416 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1417 {
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (!ia) PetscFunctionReturn(0);
1422   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1423     ierr = PetscFree(*ia);CHKERRQ(ierr);
1424     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1431 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1432 {
1433   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437 #if defined(PETSC_USE_LOG)
1438   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1439 #endif
1440   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1441   if (a->row) {
1442     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1443   }
1444   if (a->col) {
1445     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1446   }
1447   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1448   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1449   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1450   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1451   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1452   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1453   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1455   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1456   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1457 
1458   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1459   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1460   ierr = PetscFree(a);CHKERRQ(ierr);
1461 
1462   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1468   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1476 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1477 {
1478   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   switch (op) {
1483   case MAT_ROW_ORIENTED:
1484     a->roworiented    = flg;
1485     break;
1486   case MAT_KEEP_NONZERO_PATTERN:
1487     a->keepnonzeropattern = flg;
1488     break;
1489   case MAT_NEW_NONZERO_LOCATIONS:
1490     a->nonew          = (flg ? 0 : 1);
1491     break;
1492   case MAT_NEW_NONZERO_LOCATION_ERR:
1493     a->nonew          = (flg ? -1 : 0);
1494     break;
1495   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1496     a->nonew          = (flg ? -2 : 0);
1497     break;
1498   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1499     a->nounused       = (flg ? -1 : 0);
1500     break;
1501   case MAT_CHECK_COMPRESSED_ROW:
1502     a->compressedrow.check = flg;
1503     break;
1504   case MAT_NEW_DIAGONALS:
1505   case MAT_IGNORE_OFF_PROC_ENTRIES:
1506   case MAT_USE_HASH_TABLE:
1507     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1508     break;
1509   case MAT_SYMMETRIC:
1510   case MAT_STRUCTURALLY_SYMMETRIC:
1511   case MAT_HERMITIAN:
1512   case MAT_SYMMETRY_ETERNAL:
1513     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1514     break;
1515   default:
1516     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1523 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1524 {
1525   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1526   PetscErrorCode ierr;
1527   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1528   MatScalar      *aa,*aa_i;
1529   PetscScalar    *v_i;
1530 
1531   PetscFunctionBegin;
1532   bs  = A->rmap->bs;
1533   ai  = a->i;
1534   aj  = a->j;
1535   aa  = a->a;
1536   bs2 = a->bs2;
1537 
1538   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1539 
1540   bn  = row/bs;   /* Block number */
1541   bp  = row % bs; /* Block Position */
1542   M   = ai[bn+1] - ai[bn];
1543   *nz = bs*M;
1544 
1545   if (v) {
1546     *v = 0;
1547     if (*nz) {
1548       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1549       for (i=0; i<M; i++) { /* for each block in the block row */
1550         v_i  = *v + i*bs;
1551         aa_i = aa + bs2*(ai[bn] + i);
1552         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1553       }
1554     }
1555   }
1556 
1557   if (idx) {
1558     *idx = 0;
1559     if (*nz) {
1560       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1561       for (i=0; i<M; i++) { /* for each block in the block row */
1562         idx_i = *idx + i*bs;
1563         itmp  = bs*aj[ai[bn] + i];
1564         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1565       }
1566     }
1567   }
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1573 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1574 {
1575   PetscErrorCode ierr;
1576 
1577   PetscFunctionBegin;
1578   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1579   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1584 
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1587 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1588 {
1589   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1590   Mat            C;
1591   PetscErrorCode ierr;
1592   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1593   PetscInt       *rows,*cols,bs2=a->bs2;
1594   MatScalar      *array;
1595 
1596   PetscFunctionBegin;
1597   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1598   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1599     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1600     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1601 
1602     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1603     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1604     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1605     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1606     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1607     ierr = PetscFree(col);CHKERRQ(ierr);
1608   } else {
1609     C = *B;
1610   }
1611 
1612   array = a->a;
1613   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1614   for (i=0; i<mbs; i++) {
1615     cols[0] = i*bs;
1616     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1617     len = ai[i+1] - ai[i];
1618     for (j=0; j<len; j++) {
1619       rows[0] = (*aj++)*bs;
1620       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1621       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1622       array += bs2;
1623     }
1624   }
1625   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1626 
1627   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1628   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1629 
1630   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1631     *B = C;
1632   } else {
1633     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1634   }
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1640 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1641 {
1642   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1643   PetscErrorCode ierr;
1644   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1645   int            fd;
1646   PetscScalar    *aa;
1647   FILE           *file;
1648 
1649   PetscFunctionBegin;
1650   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1651   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1652   col_lens[0] = MAT_FILE_CLASSID;
1653 
1654   col_lens[1] = A->rmap->N;
1655   col_lens[2] = A->cmap->n;
1656   col_lens[3] = a->nz*bs2;
1657 
1658   /* store lengths of each row and write (including header) to file */
1659   count = 0;
1660   for (i=0; i<a->mbs; i++) {
1661     for (j=0; j<bs; j++) {
1662       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1663     }
1664   }
1665   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1666   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1667 
1668   /* store column indices (zero start index) */
1669   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1670   count = 0;
1671   for (i=0; i<a->mbs; i++) {
1672     for (j=0; j<bs; j++) {
1673       for (k=a->i[i]; k<a->i[i+1]; k++) {
1674         for (l=0; l<bs; l++) {
1675           jj[count++] = bs*a->j[k] + l;
1676         }
1677       }
1678     }
1679   }
1680   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1681   ierr = PetscFree(jj);CHKERRQ(ierr);
1682 
1683   /* store nonzero values */
1684   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1685   count = 0;
1686   for (i=0; i<a->mbs; i++) {
1687     for (j=0; j<bs; j++) {
1688       for (k=a->i[i]; k<a->i[i+1]; k++) {
1689         for (l=0; l<bs; l++) {
1690           aa[count++] = a->a[bs2*k + l*bs + j];
1691         }
1692       }
1693     }
1694   }
1695   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1696   ierr = PetscFree(aa);CHKERRQ(ierr);
1697 
1698   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1699   if (file) {
1700     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1701   }
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 #undef __FUNCT__
1706 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1707 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1708 {
1709   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1710   PetscErrorCode    ierr;
1711   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1712   PetscViewerFormat format;
1713 
1714   PetscFunctionBegin;
1715   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1716   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1717     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1718   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1719     Mat aij;
1720     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1721     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1722     ierr = MatDestroy(aij);CHKERRQ(ierr);
1723   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1724      PetscFunctionReturn(0);
1725   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1726     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1727     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1728     for (i=0; i<a->mbs; i++) {
1729       for (j=0; j<bs; j++) {
1730         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1731         for (k=a->i[i]; k<a->i[i+1]; k++) {
1732           for (l=0; l<bs; l++) {
1733 #if defined(PETSC_USE_COMPLEX)
1734             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1736                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1738               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1739                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1740             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1741               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1742             }
1743 #else
1744             if (a->a[bs2*k + l*bs + j] != 0.0) {
1745               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1746             }
1747 #endif
1748           }
1749         }
1750         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1751       }
1752     }
1753     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1754   } else {
1755     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1756     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1757     for (i=0; i<a->mbs; i++) {
1758       for (j=0; j<bs; j++) {
1759         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1760         for (k=a->i[i]; k<a->i[i+1]; k++) {
1761           for (l=0; l<bs; l++) {
1762 #if defined(PETSC_USE_COMPLEX)
1763             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1765                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1766             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1767               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1768                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1769             } else {
1770               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1771             }
1772 #else
1773             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1774 #endif
1775           }
1776         }
1777         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1778       }
1779     }
1780     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1781   }
1782   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1788 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1789 {
1790   Mat            A = (Mat) Aa;
1791   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1792   PetscErrorCode ierr;
1793   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1794   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1795   MatScalar      *aa;
1796   PetscViewer    viewer;
1797 
1798   PetscFunctionBegin;
1799 
1800   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1801   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1802 
1803   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1804 
1805   /* loop over matrix elements drawing boxes */
1806   color = PETSC_DRAW_BLUE;
1807   for (i=0,row=0; i<mbs; i++,row+=bs) {
1808     for (j=a->i[i]; j<a->i[i+1]; j++) {
1809       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1810       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1811       aa = a->a + j*bs2;
1812       for (k=0; k<bs; k++) {
1813         for (l=0; l<bs; l++) {
1814           if (PetscRealPart(*aa++) >=  0.) continue;
1815           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1816         }
1817       }
1818     }
1819   }
1820   color = PETSC_DRAW_CYAN;
1821   for (i=0,row=0; i<mbs; i++,row+=bs) {
1822     for (j=a->i[i]; j<a->i[i+1]; j++) {
1823       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1824       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1825       aa = a->a + j*bs2;
1826       for (k=0; k<bs; k++) {
1827         for (l=0; l<bs; l++) {
1828           if (PetscRealPart(*aa++) != 0.) continue;
1829           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1830         }
1831       }
1832     }
1833   }
1834 
1835   color = PETSC_DRAW_RED;
1836   for (i=0,row=0; i<mbs; i++,row+=bs) {
1837     for (j=a->i[i]; j<a->i[i+1]; j++) {
1838       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1839       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1840       aa = a->a + j*bs2;
1841       for (k=0; k<bs; k++) {
1842         for (l=0; l<bs; l++) {
1843           if (PetscRealPart(*aa++) <= 0.) continue;
1844           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1845         }
1846       }
1847     }
1848   }
1849   PetscFunctionReturn(0);
1850 }
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1854 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1855 {
1856   PetscErrorCode ierr;
1857   PetscReal      xl,yl,xr,yr,w,h;
1858   PetscDraw      draw;
1859   PetscBool      isnull;
1860 
1861   PetscFunctionBegin;
1862 
1863   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1864   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1865 
1866   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1867   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1868   xr += w;    yr += h;  xl = -w;     yl = -h;
1869   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1870   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1871   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1872   PetscFunctionReturn(0);
1873 }
1874 
1875 #undef __FUNCT__
1876 #define __FUNCT__ "MatView_SeqBAIJ"
1877 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1878 {
1879   PetscErrorCode ierr;
1880   PetscBool      iascii,isbinary,isdraw;
1881 
1882   PetscFunctionBegin;
1883   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1884   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1885   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1886   if (iascii){
1887     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1888   } else if (isbinary) {
1889     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1890   } else if (isdraw) {
1891     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1892   } else {
1893     Mat B;
1894     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1895     ierr = MatView(B,viewer);CHKERRQ(ierr);
1896     ierr = MatDestroy(B);CHKERRQ(ierr);
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1904 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1905 {
1906   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1907   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1908   PetscInt    *ai = a->i,*ailen = a->ilen;
1909   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1910   MatScalar   *ap,*aa = a->a;
1911 
1912   PetscFunctionBegin;
1913   for (k=0; k<m; k++) { /* loop over rows */
1914     row  = im[k]; brow = row/bs;
1915     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1916     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1917     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1918     nrow = ailen[brow];
1919     for (l=0; l<n; l++) { /* loop over columns */
1920       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1921       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1922       col  = in[l] ;
1923       bcol = col/bs;
1924       cidx = col%bs;
1925       ridx = row%bs;
1926       high = nrow;
1927       low  = 0; /* assume unsorted */
1928       while (high-low > 5) {
1929         t = (low+high)/2;
1930         if (rp[t] > bcol) high = t;
1931         else             low  = t;
1932       }
1933       for (i=low; i<high; i++) {
1934         if (rp[i] > bcol) break;
1935         if (rp[i] == bcol) {
1936           *v++ = ap[bs2*i+bs*cidx+ridx];
1937           goto finished;
1938         }
1939       }
1940       *v++ = 0.0;
1941       finished:;
1942     }
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1949 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1950 {
1951   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1952   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1953   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1954   PetscErrorCode    ierr;
1955   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1956   PetscBool         roworiented=a->roworiented;
1957   const PetscScalar *value = v;
1958   MatScalar         *ap,*aa = a->a,*bap;
1959 
1960   PetscFunctionBegin;
1961   if (roworiented) {
1962     stepval = (n-1)*bs;
1963   } else {
1964     stepval = (m-1)*bs;
1965   }
1966   for (k=0; k<m; k++) { /* loop over added rows */
1967     row  = im[k];
1968     if (row < 0) continue;
1969 #if defined(PETSC_USE_DEBUG)
1970     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1971 #endif
1972     rp   = aj + ai[row];
1973     ap   = aa + bs2*ai[row];
1974     rmax = imax[row];
1975     nrow = ailen[row];
1976     low  = 0;
1977     high = nrow;
1978     for (l=0; l<n; l++) { /* loop over added columns */
1979       if (in[l] < 0) continue;
1980 #if defined(PETSC_USE_DEBUG)
1981       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1982 #endif
1983       col = in[l];
1984       if (roworiented) {
1985         value = v + (k*(stepval+bs) + l)*bs;
1986       } else {
1987         value = v + (l*(stepval+bs) + k)*bs;
1988       }
1989       if (col <= lastcol) low = 0; else high = nrow;
1990       lastcol = col;
1991       while (high-low > 7) {
1992         t = (low+high)/2;
1993         if (rp[t] > col) high = t;
1994         else             low  = t;
1995       }
1996       for (i=low; i<high; i++) {
1997         if (rp[i] > col) break;
1998         if (rp[i] == col) {
1999           bap  = ap +  bs2*i;
2000           if (roworiented) {
2001             if (is == ADD_VALUES) {
2002               for (ii=0; ii<bs; ii++,value+=stepval) {
2003                 for (jj=ii; jj<bs2; jj+=bs) {
2004                   bap[jj] += *value++;
2005                 }
2006               }
2007             } else {
2008               for (ii=0; ii<bs; ii++,value+=stepval) {
2009                 for (jj=ii; jj<bs2; jj+=bs) {
2010                   bap[jj] = *value++;
2011                 }
2012               }
2013             }
2014           } else {
2015             if (is == ADD_VALUES) {
2016               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2017                 for (jj=0; jj<bs; jj++) {
2018                   bap[jj] += value[jj];
2019                 }
2020                 bap += bs;
2021               }
2022             } else {
2023               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2024                 for (jj=0; jj<bs; jj++) {
2025                   bap[jj]  = value[jj];
2026                 }
2027                 bap += bs;
2028               }
2029             }
2030           }
2031           goto noinsert2;
2032         }
2033       }
2034       if (nonew == 1) goto noinsert2;
2035       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2036       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2037       N = nrow++ - 1; high++;
2038       /* shift up all the later entries in this row */
2039       for (ii=N; ii>=i; ii--) {
2040         rp[ii+1] = rp[ii];
2041         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2042       }
2043       if (N >= i) {
2044         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2045       }
2046       rp[i] = col;
2047       bap   = ap +  bs2*i;
2048       if (roworiented) {
2049         for (ii=0; ii<bs; ii++,value+=stepval) {
2050           for (jj=ii; jj<bs2; jj+=bs) {
2051             bap[jj] = *value++;
2052           }
2053         }
2054       } else {
2055         for (ii=0; ii<bs; ii++,value+=stepval) {
2056           for (jj=0; jj<bs; jj++) {
2057             *bap++  = *value++;
2058           }
2059         }
2060       }
2061       noinsert2:;
2062       low = i;
2063     }
2064     ailen[row] = nrow;
2065   }
2066   PetscFunctionReturn(0);
2067 }
2068 
2069 #undef __FUNCT__
2070 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2071 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2072 {
2073   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2074   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2075   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2076   PetscErrorCode ierr;
2077   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2078   MatScalar      *aa = a->a,*ap;
2079   PetscReal      ratio=0.6;
2080 
2081   PetscFunctionBegin;
2082   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2083 
2084   if (m) rmax = ailen[0];
2085   for (i=1; i<mbs; i++) {
2086     /* move each row back by the amount of empty slots (fshift) before it*/
2087     fshift += imax[i-1] - ailen[i-1];
2088     rmax   = PetscMax(rmax,ailen[i]);
2089     if (fshift) {
2090       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2091       N = ailen[i];
2092       for (j=0; j<N; j++) {
2093         ip[j-fshift] = ip[j];
2094         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2095       }
2096     }
2097     ai[i] = ai[i-1] + ailen[i-1];
2098   }
2099   if (mbs) {
2100     fshift += imax[mbs-1] - ailen[mbs-1];
2101     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2102   }
2103   /* reset ilen and imax for each row */
2104   for (i=0; i<mbs; i++) {
2105     ailen[i] = imax[i] = ai[i+1] - ai[i];
2106   }
2107   a->nz = ai[mbs];
2108 
2109   /* diagonals may have moved, so kill the diagonal pointers */
2110   a->idiagvalid = PETSC_FALSE;
2111   if (fshift && a->diag) {
2112     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2113     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2114     a->diag = 0;
2115   }
2116   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2117   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2118   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2119   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2120   A->info.mallocs     += a->reallocs;
2121   a->reallocs          = 0;
2122   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2123 
2124   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2125   A->same_nonzero = PETSC_TRUE;
2126   PetscFunctionReturn(0);
2127 }
2128 
2129 /*
2130    This function returns an array of flags which indicate the locations of contiguous
2131    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2132    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2133    Assume: sizes should be long enough to hold all the values.
2134 */
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2137 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2138 {
2139   PetscInt   i,j,k,row;
2140   PetscBool  flg;
2141 
2142   PetscFunctionBegin;
2143   for (i=0,j=0; i<n; j++) {
2144     row = idx[i];
2145     if (row%bs!=0) { /* Not the begining of a block */
2146       sizes[j] = 1;
2147       i++;
2148     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2149       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2150       i++;
2151     } else { /* Begining of the block, so check if the complete block exists */
2152       flg = PETSC_TRUE;
2153       for (k=1; k<bs; k++) {
2154         if (row+k != idx[i+k]) { /* break in the block */
2155           flg = PETSC_FALSE;
2156           break;
2157         }
2158       }
2159       if (flg) { /* No break in the bs */
2160         sizes[j] = bs;
2161         i+= bs;
2162       } else {
2163         sizes[j] = 1;
2164         i++;
2165       }
2166     }
2167   }
2168   *bs_max = j;
2169   PetscFunctionReturn(0);
2170 }
2171 
2172 #undef __FUNCT__
2173 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2174 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2175 {
2176   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2177   PetscErrorCode    ierr;
2178   PetscInt          i,j,k,count,*rows;
2179   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2180   PetscScalar       zero = 0.0;
2181   MatScalar         *aa;
2182   const PetscScalar *xx;
2183   PetscScalar       *bb;
2184 
2185   PetscFunctionBegin;
2186   /* fix right hand side if needed */
2187   if (x && b) {
2188     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2189     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2190     for (i=0; i<is_n; i++) {
2191       bb[is_idx[i]] = diag*xx[is_idx[i]];
2192     }
2193     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2194     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2195   }
2196 
2197   /* Make a copy of the IS and  sort it */
2198   /* allocate memory for rows,sizes */
2199   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2200 
2201   /* copy IS values to rows, and sort them */
2202   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2203   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2204 
2205   if (baij->keepnonzeropattern) {
2206     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2207     bs_max = is_n;
2208     A->same_nonzero = PETSC_TRUE;
2209   } else {
2210     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2211     A->same_nonzero = PETSC_FALSE;
2212   }
2213 
2214   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2215     row   = rows[j];
2216     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2217     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2218     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2219     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2220       if (diag != 0.0) {
2221         if (baij->ilen[row/bs] > 0) {
2222           baij->ilen[row/bs]       = 1;
2223           baij->j[baij->i[row/bs]] = row/bs;
2224           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2225         }
2226         /* Now insert all the diagonal values for this bs */
2227         for (k=0; k<bs; k++) {
2228           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2229         }
2230       } else { /* (diag == 0.0) */
2231         baij->ilen[row/bs] = 0;
2232       } /* end (diag == 0.0) */
2233     } else { /* (sizes[i] != bs) */
2234 #if defined (PETSC_USE_DEBUG)
2235       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2236 #endif
2237       for (k=0; k<count; k++) {
2238         aa[0] =  zero;
2239         aa    += bs;
2240       }
2241       if (diag != 0.0) {
2242         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2243       }
2244     }
2245   }
2246 
2247   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2248   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2249   PetscFunctionReturn(0);
2250 }
2251 
2252 #undef __FUNCT__
2253 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2254 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2255 {
2256   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2257   PetscErrorCode    ierr;
2258   PetscInt          i,j,k,count;
2259   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2260   PetscScalar       zero = 0.0;
2261   MatScalar         *aa;
2262   const PetscScalar *xx;
2263   PetscScalar       *bb;
2264   PetscBool         *zeroed,vecs = PETSC_FALSE;
2265 
2266   PetscFunctionBegin;
2267   /* fix right hand side if needed */
2268   if (x && b) {
2269     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2270     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2271     vecs = PETSC_TRUE;
2272   }
2273   A->same_nonzero = PETSC_TRUE;
2274 
2275   /* zero the columns */
2276   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2277   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2278   for (i=0; i<is_n; i++) {
2279     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2280     zeroed[is_idx[i]] = PETSC_TRUE;
2281   }
2282   for (i=0; i<A->rmap->N; i++) {
2283     if (!zeroed[i]) {
2284       row = i/bs;
2285       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2286         for (k=0; k<bs; k++) {
2287           col = bs*baij->j[j] + k;
2288 	  if (zeroed[col]) {
2289 	    aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2290             if (vecs) bb[i] -= aa[0]*xx[col];
2291             aa[0] = 0.0;
2292           }
2293         }
2294       }
2295     } else if (vecs) bb[i] = diag*xx[i];
2296   }
2297   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2298   if (vecs) {
2299     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2300     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2301   }
2302 
2303   /* zero the rows */
2304   for (i=0; i<is_n; i++) {
2305     row   = is_idx[i];
2306     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2307     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2308     for (k=0; k<count; k++) {
2309       aa[0] =  zero;
2310       aa    += bs;
2311     }
2312     if (diag != 0.0) {
2313       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2314     }
2315   }
2316   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2317   PetscFunctionReturn(0);
2318 }
2319 
2320 #undef __FUNCT__
2321 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2322 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2323 {
2324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2325   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2326   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2327   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2328   PetscErrorCode ierr;
2329   PetscInt       ridx,cidx,bs2=a->bs2;
2330   PetscBool      roworiented=a->roworiented;
2331   MatScalar      *ap,value,*aa=a->a,*bap;
2332 
2333   PetscFunctionBegin;
2334   if (v) PetscValidScalarPointer(v,6);
2335   for (k=0; k<m; k++) { /* loop over added rows */
2336     row  = im[k];
2337     brow = row/bs;
2338     if (row < 0) continue;
2339 #if defined(PETSC_USE_DEBUG)
2340     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2341 #endif
2342     rp   = aj + ai[brow];
2343     ap   = aa + bs2*ai[brow];
2344     rmax = imax[brow];
2345     nrow = ailen[brow];
2346     low  = 0;
2347     high = nrow;
2348     for (l=0; l<n; l++) { /* loop over added columns */
2349       if (in[l] < 0) continue;
2350 #if defined(PETSC_USE_DEBUG)
2351       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2352 #endif
2353       col = in[l]; bcol = col/bs;
2354       ridx = row % bs; cidx = col % bs;
2355       if (roworiented) {
2356         value = v[l + k*n];
2357       } else {
2358         value = v[k + l*m];
2359       }
2360       if (col <= lastcol) low = 0; else high = nrow;
2361       lastcol = col;
2362       while (high-low > 7) {
2363         t = (low+high)/2;
2364         if (rp[t] > bcol) high = t;
2365         else              low  = t;
2366       }
2367       for (i=low; i<high; i++) {
2368         if (rp[i] > bcol) break;
2369         if (rp[i] == bcol) {
2370           bap  = ap +  bs2*i + bs*cidx + ridx;
2371           if (is == ADD_VALUES) *bap += value;
2372           else                  *bap  = value;
2373           goto noinsert1;
2374         }
2375       }
2376       if (nonew == 1) goto noinsert1;
2377       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2378       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2379       N = nrow++ - 1; high++;
2380       /* shift up all the later entries in this row */
2381       for (ii=N; ii>=i; ii--) {
2382         rp[ii+1] = rp[ii];
2383         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2384       }
2385       if (N>=i) {
2386         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2387       }
2388       rp[i]                      = bcol;
2389       ap[bs2*i + bs*cidx + ridx] = value;
2390       a->nz++;
2391       noinsert1:;
2392       low = i;
2393     }
2394     ailen[brow] = nrow;
2395   }
2396   A->same_nonzero = PETSC_FALSE;
2397   PetscFunctionReturn(0);
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2402 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2403 {
2404   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2405   Mat            outA;
2406   PetscErrorCode ierr;
2407   PetscBool      row_identity,col_identity;
2408 
2409   PetscFunctionBegin;
2410   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2411   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2412   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2413   if (!row_identity || !col_identity) {
2414     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2415   }
2416 
2417   outA            = inA;
2418   inA->factortype = MAT_FACTOR_LU;
2419 
2420   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2421 
2422   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2423   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2424   a->row = row;
2425   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2426   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2427   a->col = col;
2428 
2429   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2430   if (a->icol) {
2431     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2432   }
2433   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2434   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2435 
2436   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2437   if (!a->solve_work) {
2438     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2439     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2440   }
2441   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2442 
2443   PetscFunctionReturn(0);
2444 }
2445 
2446 EXTERN_C_BEGIN
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2449 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2450 {
2451   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2452   PetscInt    i,nz,mbs;
2453 
2454   PetscFunctionBegin;
2455   nz  = baij->maxnz;
2456   mbs = baij->mbs;
2457   for (i=0; i<nz; i++) {
2458     baij->j[i] = indices[i];
2459   }
2460   baij->nz = nz;
2461   for (i=0; i<mbs; i++) {
2462     baij->ilen[i] = baij->imax[i];
2463   }
2464   PetscFunctionReturn(0);
2465 }
2466 EXTERN_C_END
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2470 /*@
2471     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2472        in the matrix.
2473 
2474   Input Parameters:
2475 +  mat - the SeqBAIJ matrix
2476 -  indices - the column indices
2477 
2478   Level: advanced
2479 
2480   Notes:
2481     This can be called if you have precomputed the nonzero structure of the
2482   matrix and want to provide it to the matrix object to improve the performance
2483   of the MatSetValues() operation.
2484 
2485     You MUST have set the correct numbers of nonzeros per row in the call to
2486   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2487 
2488     MUST be called before any calls to MatSetValues();
2489 
2490 @*/
2491 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2492 {
2493   PetscErrorCode ierr;
2494 
2495   PetscFunctionBegin;
2496   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2497   PetscValidPointer(indices,2);
2498   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2504 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2505 {
2506   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2507   PetscErrorCode ierr;
2508   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2509   PetscReal      atmp;
2510   PetscScalar    *x,zero = 0.0;
2511   MatScalar      *aa;
2512   PetscInt       ncols,brow,krow,kcol;
2513 
2514   PetscFunctionBegin;
2515   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2516   bs   = A->rmap->bs;
2517   aa   = a->a;
2518   ai   = a->i;
2519   aj   = a->j;
2520   mbs  = a->mbs;
2521 
2522   ierr = VecSet(v,zero);CHKERRQ(ierr);
2523   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2524   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2525   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2526   for (i=0; i<mbs; i++) {
2527     ncols = ai[1] - ai[0]; ai++;
2528     brow  = bs*i;
2529     for (j=0; j<ncols; j++){
2530       for (kcol=0; kcol<bs; kcol++){
2531         for (krow=0; krow<bs; krow++){
2532           atmp = PetscAbsScalar(*aa);aa++;
2533           row = brow + krow;    /* row index */
2534           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2535           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2536         }
2537       }
2538       aj++;
2539     }
2540   }
2541   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 #undef __FUNCT__
2546 #define __FUNCT__ "MatCopy_SeqBAIJ"
2547 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2548 {
2549   PetscErrorCode ierr;
2550 
2551   PetscFunctionBegin;
2552   /* If the two matrices have the same copy implementation, use fast copy. */
2553   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2554     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2555     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2556     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2557 
2558     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2559     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2560     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2561   } else {
2562     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2563   }
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2569 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2570 {
2571   PetscErrorCode ierr;
2572 
2573   PetscFunctionBegin;
2574   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2575   PetscFunctionReturn(0);
2576 }
2577 
2578 #undef __FUNCT__
2579 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2580 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2581 {
2582   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2583   PetscFunctionBegin;
2584   *array = a->a;
2585   PetscFunctionReturn(0);
2586 }
2587 
2588 #undef __FUNCT__
2589 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2590 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2591 {
2592   PetscFunctionBegin;
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2598 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2599 {
2600   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2601   PetscErrorCode ierr;
2602   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2603   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2604 
2605   PetscFunctionBegin;
2606   if (str == SAME_NONZERO_PATTERN) {
2607     PetscScalar alpha = a;
2608     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2609   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2610     if (y->xtoy && y->XtoY != X) {
2611       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2612       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2613     }
2614     if (!y->xtoy) { /* get xtoy */
2615       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2616       y->XtoY = X;
2617       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2618     }
2619     bs2 = bs*bs;
2620     for (i=0; i<x->nz; i++) {
2621       j = 0;
2622       while (j < bs2){
2623         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2624         j++;
2625       }
2626     }
2627     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2628   } else {
2629     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2630   }
2631   PetscFunctionReturn(0);
2632 }
2633 
2634 #undef __FUNCT__
2635 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2636 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2637 {
2638   PetscInt rbs,cbs;
2639   PetscErrorCode ierr;
2640 
2641   PetscFunctionBegin;
2642   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2643   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2644   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2645   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2646   PetscFunctionReturn(0);
2647 }
2648 
2649 #undef __FUNCT__
2650 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2651 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2652 {
2653   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2654   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2655   MatScalar      *aa = a->a;
2656 
2657   PetscFunctionBegin;
2658   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2659   PetscFunctionReturn(0);
2660 }
2661 
2662 #undef __FUNCT__
2663 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2664 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2665 {
2666   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2667   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2668   MatScalar      *aa = a->a;
2669 
2670   PetscFunctionBegin;
2671   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2672   PetscFunctionReturn(0);
2673 }
2674 
2675 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2676 
2677 #undef __FUNCT__
2678 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2679 /*
2680     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2681 */
2682 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2683 {
2684   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2685   PetscErrorCode ierr;
2686   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2687   PetscInt       nz = a->i[m],row,*jj,mr,col;
2688 
2689   PetscFunctionBegin;
2690   *nn = n;
2691   if (!ia) PetscFunctionReturn(0);
2692   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2693   else {
2694     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2695     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2697     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2698     jj = a->j;
2699     for (i=0; i<nz; i++) {
2700       collengths[jj[i]]++;
2701     }
2702     cia[0] = oshift;
2703     for (i=0; i<n; i++) {
2704       cia[i+1] = cia[i] + collengths[i];
2705     }
2706     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2707     jj   = a->j;
2708     for (row=0; row<m; row++) {
2709       mr = a->i[row+1] - a->i[row];
2710       for (i=0; i<mr; i++) {
2711         col = *jj++;
2712         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2713       }
2714     }
2715     ierr = PetscFree(collengths);CHKERRQ(ierr);
2716     *ia = cia; *ja = cja;
2717   }
2718   PetscFunctionReturn(0);
2719 }
2720 
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2723 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2724 {
2725   PetscErrorCode ierr;
2726 
2727   PetscFunctionBegin;
2728   if (!ia) PetscFunctionReturn(0);
2729   ierr = PetscFree(*ia);CHKERRQ(ierr);
2730   ierr = PetscFree(*ja);CHKERRQ(ierr);
2731   PetscFunctionReturn(0);
2732 }
2733 
2734 #undef __FUNCT__
2735 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2736 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2737 {
2738   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2739   PetscErrorCode ierr;
2740   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2741   PetscScalar    dx,*y,*xx,*w3_array;
2742   PetscScalar    *vscale_array;
2743   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2744   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2745   void           *fctx = coloring->fctx;
2746   PetscBool      flg = PETSC_FALSE;
2747   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2748   Vec            x1_tmp;
2749 
2750   PetscFunctionBegin;
2751   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2752   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2753   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2754   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2755 
2756   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2757   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2758   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2759   if (flg) {
2760     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2761   } else {
2762     PetscBool  assembled;
2763     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2764     if (assembled) {
2765       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2766     }
2767   }
2768 
2769   x1_tmp = x1;
2770   if (!coloring->vscale){
2771     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2772   }
2773 
2774   /*
2775     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2776     coloring->F for the coarser grids from the finest
2777   */
2778   if (coloring->F) {
2779     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2780     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2781     if (m1 != m2) {
2782       coloring->F = 0;
2783       }
2784     }
2785 
2786   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2787     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2788   }
2789   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2790 
2791   /* Set w1 = F(x1) */
2792   if (coloring->F) {
2793     w1          = coloring->F; /* use already computed value of function */
2794     coloring->F = 0;
2795   } else {
2796     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2797     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2798     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2799   }
2800 
2801   if (!coloring->w3) {
2802     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2803     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2804   }
2805   w3 = coloring->w3;
2806 
2807     CHKMEMQ;
2808     /* Compute all the local scale factors, including ghost points */
2809   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2810   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2811   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2812   if (ctype == IS_COLORING_GHOSTED){
2813     col_start = 0; col_end = N;
2814   } else if (ctype == IS_COLORING_GLOBAL){
2815     xx = xx - start;
2816     vscale_array = vscale_array - start;
2817     col_start = start; col_end = N + start;
2818   }    CHKMEMQ;
2819   for (col=col_start; col<col_end; col++){
2820     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2821     if (coloring->htype[0] == 'w') {
2822       dx = 1.0 + unorm;
2823     } else {
2824       dx  = xx[col];
2825     }
2826     if (dx == 0.0) dx = 1.0;
2827 #if !defined(PETSC_USE_COMPLEX)
2828     if (dx < umin && dx >= 0.0)      dx = umin;
2829     else if (dx < 0.0 && dx > -umin) dx = -umin;
2830 #else
2831     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2832     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2833 #endif
2834     dx               *= epsilon;
2835     vscale_array[col] = 1.0/dx;
2836   }     CHKMEMQ;
2837   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2838   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2839   if (ctype == IS_COLORING_GLOBAL){
2840     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2841     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2842   }
2843   CHKMEMQ;
2844   if (coloring->vscaleforrow) {
2845     vscaleforrow = coloring->vscaleforrow;
2846   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2847 
2848   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2849   /*
2850     Loop over each color
2851   */
2852   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2853   for (k=0; k<coloring->ncolors; k++) {
2854     coloring->currentcolor = k;
2855     for (i=0; i<bs; i++) {
2856       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2857       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2858       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2859       /*
2860 	Loop over each column associated with color
2861 	adding the perturbation to the vector w3.
2862       */
2863       for (l=0; l<coloring->ncolumns[k]; l++) {
2864 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2865 	if (coloring->htype[0] == 'w') {
2866 	  dx = 1.0 + unorm;
2867 	} else {
2868 	  dx  = xx[col];
2869 	}
2870 	if (dx == 0.0) dx = 1.0;
2871 #if !defined(PETSC_USE_COMPLEX)
2872 	if (dx < umin && dx >= 0.0)      dx = umin;
2873 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2874 #else
2875 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2876 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2877 #endif
2878 	dx            *= epsilon;
2879 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2880 	w3_array[col] += dx;
2881       }
2882       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2883       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2884 
2885       /*
2886 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2887 	w2 = F(x1 + dx) - F(x1)
2888       */
2889       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2890       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2891       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2892       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2893 
2894       /*
2895 	Loop over rows of vector, putting results into Jacobian matrix
2896       */
2897       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2898       for (l=0; l<coloring->nrows[k]; l++) {
2899 	row    = bs*coloring->rows[k][l];             /* local row index */
2900 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2901         for (j=0; j<bs; j++) {
2902   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2903           srows[j]  = row + start + j;
2904         }
2905 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2906       }
2907       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2908     }
2909   } /* endof for each color */
2910   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2911   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2912   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2913   ierr = PetscFree(srows);CHKERRQ(ierr);
2914 
2915   coloring->currentcolor = -1;
2916   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2917   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2918   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2919   PetscFunctionReturn(0);
2920 }
2921 
2922 /* -------------------------------------------------------------------*/
2923 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2924        MatGetRow_SeqBAIJ,
2925        MatRestoreRow_SeqBAIJ,
2926        MatMult_SeqBAIJ_N,
2927 /* 4*/ MatMultAdd_SeqBAIJ_N,
2928        MatMultTranspose_SeqBAIJ,
2929        MatMultTransposeAdd_SeqBAIJ,
2930        0,
2931        0,
2932        0,
2933 /*10*/ 0,
2934        MatLUFactor_SeqBAIJ,
2935        0,
2936        0,
2937        MatTranspose_SeqBAIJ,
2938 /*15*/ MatGetInfo_SeqBAIJ,
2939        MatEqual_SeqBAIJ,
2940        MatGetDiagonal_SeqBAIJ,
2941        MatDiagonalScale_SeqBAIJ,
2942        MatNorm_SeqBAIJ,
2943 /*20*/ 0,
2944        MatAssemblyEnd_SeqBAIJ,
2945        MatSetOption_SeqBAIJ,
2946        MatZeroEntries_SeqBAIJ,
2947 /*24*/ MatZeroRows_SeqBAIJ,
2948        0,
2949        0,
2950        0,
2951        0,
2952 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2953        0,
2954        0,
2955        MatGetArray_SeqBAIJ,
2956        MatRestoreArray_SeqBAIJ,
2957 /*34*/ MatDuplicate_SeqBAIJ,
2958        0,
2959        0,
2960        MatILUFactor_SeqBAIJ,
2961        0,
2962 /*39*/ MatAXPY_SeqBAIJ,
2963        MatGetSubMatrices_SeqBAIJ,
2964        MatIncreaseOverlap_SeqBAIJ,
2965        MatGetValues_SeqBAIJ,
2966        MatCopy_SeqBAIJ,
2967 /*44*/ 0,
2968        MatScale_SeqBAIJ,
2969        0,
2970        0,
2971        MatZeroRowsColumns_SeqBAIJ,
2972 /*49*/ MatSetBlockSize_SeqBAIJ,
2973        MatGetRowIJ_SeqBAIJ,
2974        MatRestoreRowIJ_SeqBAIJ,
2975        MatGetColumnIJ_SeqBAIJ,
2976        MatRestoreColumnIJ_SeqBAIJ,
2977 /*54*/ MatFDColoringCreate_SeqAIJ,
2978        0,
2979        0,
2980        0,
2981        MatSetValuesBlocked_SeqBAIJ,
2982 /*59*/ MatGetSubMatrix_SeqBAIJ,
2983        MatDestroy_SeqBAIJ,
2984        MatView_SeqBAIJ,
2985        0,
2986        0,
2987 /*64*/ 0,
2988        0,
2989        0,
2990        0,
2991        0,
2992 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2993        0,
2994        MatConvert_Basic,
2995        0,
2996        0,
2997 /*74*/ 0,
2998        MatFDColoringApply_BAIJ,
2999        0,
3000        0,
3001        0,
3002 /*79*/ 0,
3003        0,
3004        0,
3005        0,
3006        MatLoad_SeqBAIJ,
3007 /*84*/ 0,
3008        0,
3009        0,
3010        0,
3011        0,
3012 /*89*/ 0,
3013        0,
3014        0,
3015        0,
3016        0,
3017 /*94*/ 0,
3018        0,
3019        0,
3020        0,
3021        0,
3022 /*99*/0,
3023        0,
3024        0,
3025        0,
3026        0,
3027 /*104*/0,
3028        MatRealPart_SeqBAIJ,
3029        MatImaginaryPart_SeqBAIJ,
3030        0,
3031        0,
3032 /*109*/0,
3033        0,
3034        0,
3035        0,
3036        MatMissingDiagonal_SeqBAIJ,
3037 /*114*/0,
3038        0,
3039        0,
3040        0,
3041        0,
3042 /*119*/0,
3043        0,
3044        MatMultHermitianTranspose_SeqBAIJ,
3045        MatMultHermitianTransposeAdd_SeqBAIJ,
3046        0,
3047 };
3048 
3049 EXTERN_C_BEGIN
3050 #undef __FUNCT__
3051 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3052 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3053 {
3054   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3055   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3056   PetscErrorCode ierr;
3057 
3058   PetscFunctionBegin;
3059   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3060 
3061   /* allocate space for values if not already there */
3062   if (!aij->saved_values) {
3063     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3064     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3065   }
3066 
3067   /* copy values over */
3068   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3069   PetscFunctionReturn(0);
3070 }
3071 EXTERN_C_END
3072 
3073 EXTERN_C_BEGIN
3074 #undef __FUNCT__
3075 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3076 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3077 {
3078   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3079   PetscErrorCode ierr;
3080   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3081 
3082   PetscFunctionBegin;
3083   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3084   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3085 
3086   /* copy values over */
3087   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3088   PetscFunctionReturn(0);
3089 }
3090 EXTERN_C_END
3091 
3092 EXTERN_C_BEGIN
3093 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3094 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3095 EXTERN_C_END
3096 
3097 EXTERN_C_BEGIN
3098 #undef __FUNCT__
3099 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3100 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3101 {
3102   Mat_SeqBAIJ    *b;
3103   PetscErrorCode ierr;
3104   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3105   PetscBool      flg,skipallocation = PETSC_FALSE;
3106 
3107   PetscFunctionBegin;
3108 
3109   if (nz == MAT_SKIP_ALLOCATION) {
3110     skipallocation = PETSC_TRUE;
3111     nz             = 0;
3112   }
3113 
3114   if (bs < 0) {
3115     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3116       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3117     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3118     bs   = PetscAbs(bs);
3119   }
3120   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3121   bs   = newbs;
3122 
3123   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3124   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3125   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3126   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3127 
3128   B->preallocated = PETSC_TRUE;
3129 
3130   mbs  = B->rmap->n/bs;
3131   nbs  = B->cmap->n/bs;
3132   bs2  = bs*bs;
3133 
3134   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3135 
3136   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3137   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3138   if (nnz) {
3139     for (i=0; i<mbs; i++) {
3140       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3141       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3142     }
3143   }
3144 
3145   b       = (Mat_SeqBAIJ*)B->data;
3146   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3147     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3148   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3149 
3150   if (!flg) {
3151     switch (bs) {
3152     case 1:
3153       B->ops->mult            = MatMult_SeqBAIJ_1;
3154       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3155       B->ops->sor             = MatSOR_SeqBAIJ_1;
3156       break;
3157     case 2:
3158       B->ops->mult            = MatMult_SeqBAIJ_2;
3159       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3160       B->ops->sor             = MatSOR_SeqBAIJ_2;
3161       break;
3162     case 3:
3163       B->ops->mult            = MatMult_SeqBAIJ_3;
3164       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3165       B->ops->sor             = MatSOR_SeqBAIJ_3;
3166       break;
3167     case 4:
3168       B->ops->mult            = MatMult_SeqBAIJ_4;
3169       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3170       B->ops->sor             = MatSOR_SeqBAIJ_4;
3171       break;
3172     case 5:
3173       B->ops->mult            = MatMult_SeqBAIJ_5;
3174       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3175       B->ops->sor             = MatSOR_SeqBAIJ_5;
3176       break;
3177     case 6:
3178       B->ops->mult            = MatMult_SeqBAIJ_6;
3179       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3180       B->ops->sor             = MatSOR_SeqBAIJ_6;
3181       break;
3182     case 7:
3183       B->ops->mult            = MatMult_SeqBAIJ_7;
3184       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3185       B->ops->sor             = MatSOR_SeqBAIJ_7;
3186       break;
3187     case 15:
3188       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3189       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3190       B->ops->sor             = MatSOR_SeqBAIJ_N;
3191       break;
3192     default:
3193       B->ops->mult            = MatMult_SeqBAIJ_N;
3194       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3195       B->ops->sor             = MatSOR_SeqBAIJ_N;
3196       break;
3197     }
3198   }
3199   B->rmap->bs  = bs;
3200   b->mbs       = mbs;
3201   b->nbs       = nbs;
3202   if (!skipallocation) {
3203     if (!b->imax) {
3204       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3205       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3206       b->free_imax_ilen = PETSC_TRUE;
3207     }
3208     /* b->ilen will count nonzeros in each block row so far. */
3209     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3210     if (!nnz) {
3211       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3212       else if (nz < 0) nz = 1;
3213       for (i=0; i<mbs; i++) b->imax[i] = nz;
3214       nz = nz*mbs;
3215     } else {
3216       nz = 0;
3217       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3218     }
3219 
3220     /* allocate the matrix space */
3221     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3222     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3223     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3224     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3225     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3226     b->singlemalloc = PETSC_TRUE;
3227     b->i[0] = 0;
3228     for (i=1; i<mbs+1; i++) {
3229       b->i[i] = b->i[i-1] + b->imax[i-1];
3230     }
3231     b->free_a     = PETSC_TRUE;
3232     b->free_ij    = PETSC_TRUE;
3233   } else {
3234     b->free_a     = PETSC_FALSE;
3235     b->free_ij    = PETSC_FALSE;
3236   }
3237 
3238   B->rmap->bs          = bs;
3239   b->bs2              = bs2;
3240   b->mbs              = mbs;
3241   b->nz               = 0;
3242   b->maxnz            = nz;
3243   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3244   PetscFunctionReturn(0);
3245 }
3246 EXTERN_C_END
3247 
3248 EXTERN_C_BEGIN
3249 #undef __FUNCT__
3250 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3251 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3252 {
3253   PetscInt       i,m,nz,nz_max=0,*nnz;
3254   PetscScalar    *values=0;
3255   PetscErrorCode ierr;
3256 
3257   PetscFunctionBegin;
3258   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3259   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3260   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3261   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3262   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3263   m = B->rmap->n/bs;
3264 
3265   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3266   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3267   for(i=0; i<m; i++) {
3268     nz = ii[i+1]- ii[i];
3269     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3270     nz_max = PetscMax(nz_max, nz);
3271     nnz[i] = nz;
3272   }
3273   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3274   ierr = PetscFree(nnz);CHKERRQ(ierr);
3275 
3276   values = (PetscScalar*)V;
3277   if (!values) {
3278     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3279     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3280   }
3281   for (i=0; i<m; i++) {
3282     PetscInt          ncols  = ii[i+1] - ii[i];
3283     const PetscInt    *icols = jj + ii[i];
3284     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3285     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3286   }
3287   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3288   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3289   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3290 
3291   PetscFunctionReturn(0);
3292 }
3293 EXTERN_C_END
3294 
3295 
3296 EXTERN_C_BEGIN
3297 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3298 #if defined(PETSC_HAVE_MUMPS)
3299 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3300 #endif
3301 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3302 EXTERN_C_END
3303 
3304 /*MC
3305    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3306    block sparse compressed row format.
3307 
3308    Options Database Keys:
3309 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3310 
3311   Level: beginner
3312 
3313 .seealso: MatCreateSeqBAIJ()
3314 M*/
3315 
3316 
3317 EXTERN_C_BEGIN
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatCreate_SeqBAIJ"
3320 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3321 {
3322   PetscErrorCode ierr;
3323   PetscMPIInt    size;
3324   Mat_SeqBAIJ    *b;
3325 
3326   PetscFunctionBegin;
3327   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3328   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3329 
3330   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3331   B->data = (void*)b;
3332   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3333   b->row                   = 0;
3334   b->col                   = 0;
3335   b->icol                  = 0;
3336   b->reallocs              = 0;
3337   b->saved_values          = 0;
3338 
3339   b->roworiented           = PETSC_TRUE;
3340   b->nonew                 = 0;
3341   b->diag                  = 0;
3342   b->solve_work            = 0;
3343   b->mult_work             = 0;
3344   B->spptr                 = 0;
3345   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3346   b->keepnonzeropattern    = PETSC_FALSE;
3347   b->xtoy                  = 0;
3348   b->XtoY                  = 0;
3349   B->same_nonzero          = PETSC_FALSE;
3350 
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3352                                      "MatGetFactorAvailable_seqbaij_petsc",
3353                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3354   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3355                                      "MatGetFactor_seqbaij_petsc",
3356                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3357 #if defined(PETSC_HAVE_MUMPS)
3358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3359 #endif
3360   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3361                                      "MatInvertBlockDiagonal_SeqBAIJ",
3362                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3363   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3364                                      "MatStoreValues_SeqBAIJ",
3365                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3366   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3367                                      "MatRetrieveValues_SeqBAIJ",
3368                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3369   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3370                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3371                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3372   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3373                                      "MatConvert_SeqBAIJ_SeqAIJ",
3374                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3375   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3376                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3377                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3378   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3379                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3380                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3381   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3382                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3383                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3384   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3385   PetscFunctionReturn(0);
3386 }
3387 EXTERN_C_END
3388 
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3391 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3392 {
3393   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3394   PetscErrorCode ierr;
3395   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3396 
3397   PetscFunctionBegin;
3398   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3399 
3400   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3401     c->imax = a->imax;
3402     c->ilen = a->ilen;
3403     c->free_imax_ilen = PETSC_FALSE;
3404   } else {
3405     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3406     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3407     for (i=0; i<mbs; i++) {
3408       c->imax[i] = a->imax[i];
3409       c->ilen[i] = a->ilen[i];
3410     }
3411     c->free_imax_ilen = PETSC_TRUE;
3412   }
3413 
3414   /* allocate the matrix space */
3415   if (mallocmatspace){
3416     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3417       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3418       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3419       c->singlemalloc = PETSC_FALSE;
3420       c->free_ij      = PETSC_FALSE;
3421       c->i            = a->i;
3422       c->j            = a->j;
3423       c->parent       = A;
3424       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3425       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3426       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3427     } else {
3428       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3429       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3430       c->singlemalloc = PETSC_TRUE;
3431       c->free_ij      = PETSC_TRUE;
3432       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3433       if (mbs > 0) {
3434 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3435 	if (cpvalues == MAT_COPY_VALUES) {
3436 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3437 	} else {
3438 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3439 	}
3440       }
3441     }
3442   }
3443 
3444   c->roworiented = a->roworiented;
3445   c->nonew       = a->nonew;
3446   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3447   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3448   c->bs2         = a->bs2;
3449   c->mbs         = a->mbs;
3450   c->nbs         = a->nbs;
3451 
3452   if (a->diag) {
3453     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3454       c->diag      = a->diag;
3455       c->free_diag = PETSC_FALSE;
3456     } else {
3457       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3458       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3459       for (i=0; i<mbs; i++) {
3460         c->diag[i] = a->diag[i];
3461       }
3462       c->free_diag = PETSC_TRUE;
3463     }
3464   } else c->diag        = 0;
3465   c->nz                 = a->nz;
3466   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3467   c->solve_work         = 0;
3468   c->mult_work          = 0;
3469   c->free_a             = PETSC_TRUE;
3470   c->free_ij            = PETSC_TRUE;
3471   C->preallocated       = PETSC_TRUE;
3472   C->assembled          = PETSC_TRUE;
3473 
3474   c->compressedrow.use     = a->compressedrow.use;
3475   c->compressedrow.nrows   = a->compressedrow.nrows;
3476   c->compressedrow.check   = a->compressedrow.check;
3477   if (a->compressedrow.use){
3478     i = a->compressedrow.nrows;
3479     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3480     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3481     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3482     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3483   } else {
3484     c->compressedrow.use    = PETSC_FALSE;
3485     c->compressedrow.i      = PETSC_NULL;
3486     c->compressedrow.rindex = PETSC_NULL;
3487   }
3488   C->same_nonzero = A->same_nonzero;
3489   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3490   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3491   PetscFunctionReturn(0);
3492 }
3493 
3494 #undef __FUNCT__
3495 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3496 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3497 {
3498     PetscErrorCode ierr;
3499 
3500   PetscFunctionBegin;
3501   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3502   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3503   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3504   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3505   PetscFunctionReturn(0);
3506 }
3507 
3508 #undef __FUNCT__
3509 #define __FUNCT__ "MatLoad_SeqBAIJ"
3510 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3511 {
3512   Mat_SeqBAIJ    *a;
3513   PetscErrorCode ierr;
3514   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3515   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3516   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3517   PetscInt       *masked,nmask,tmp,bs2,ishift;
3518   PetscMPIInt    size;
3519   int            fd;
3520   PetscScalar    *aa;
3521   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3522 
3523   PetscFunctionBegin;
3524   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3525   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3526   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3527   bs2  = bs*bs;
3528 
3529   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3530   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3531   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3532   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3533   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3534   M = header[1]; N = header[2]; nz = header[3];
3535 
3536   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3537   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3538 
3539   /*
3540      This code adds extra rows to make sure the number of rows is
3541     divisible by the blocksize
3542   */
3543   mbs        = M/bs;
3544   extra_rows = bs - M + bs*(mbs);
3545   if (extra_rows == bs) extra_rows = 0;
3546   else                  mbs++;
3547   if (extra_rows) {
3548     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3549   }
3550 
3551   /* Set global sizes if not already set */
3552   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3553     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3554   } else { /* Check if the matrix global sizes are correct */
3555     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3556     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3557   }
3558 
3559   /* read in row lengths */
3560   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3561   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3562   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3563 
3564   /* read in column indices */
3565   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3566   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3567   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3568 
3569   /* loop over row lengths determining block row lengths */
3570   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3571   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3572   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3573   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3574   rowcount = 0;
3575   nzcount = 0;
3576   for (i=0; i<mbs; i++) {
3577     nmask = 0;
3578     for (j=0; j<bs; j++) {
3579       kmax = rowlengths[rowcount];
3580       for (k=0; k<kmax; k++) {
3581         tmp = jj[nzcount++]/bs;
3582         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3583       }
3584       rowcount++;
3585     }
3586     browlengths[i] += nmask;
3587     /* zero out the mask elements we set */
3588     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3589   }
3590 
3591   /* Do preallocation  */
3592   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3593   a = (Mat_SeqBAIJ*)newmat->data;
3594 
3595   /* set matrix "i" values */
3596   a->i[0] = 0;
3597   for (i=1; i<= mbs; i++) {
3598     a->i[i]      = a->i[i-1] + browlengths[i-1];
3599     a->ilen[i-1] = browlengths[i-1];
3600   }
3601   a->nz         = 0;
3602   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3603 
3604   /* read in nonzero values */
3605   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3606   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3607   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3608 
3609   /* set "a" and "j" values into matrix */
3610   nzcount = 0; jcount = 0;
3611   for (i=0; i<mbs; i++) {
3612     nzcountb = nzcount;
3613     nmask    = 0;
3614     for (j=0; j<bs; j++) {
3615       kmax = rowlengths[i*bs+j];
3616       for (k=0; k<kmax; k++) {
3617         tmp = jj[nzcount++]/bs;
3618 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3619       }
3620     }
3621     /* sort the masked values */
3622     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3623 
3624     /* set "j" values into matrix */
3625     maskcount = 1;
3626     for (j=0; j<nmask; j++) {
3627       a->j[jcount++]  = masked[j];
3628       mask[masked[j]] = maskcount++;
3629     }
3630     /* set "a" values into matrix */
3631     ishift = bs2*a->i[i];
3632     for (j=0; j<bs; j++) {
3633       kmax = rowlengths[i*bs+j];
3634       for (k=0; k<kmax; k++) {
3635         tmp       = jj[nzcountb]/bs ;
3636         block     = mask[tmp] - 1;
3637         point     = jj[nzcountb] - bs*tmp;
3638         idx       = ishift + bs2*block + j + bs*point;
3639         a->a[idx] = (MatScalar)aa[nzcountb++];
3640       }
3641     }
3642     /* zero out the mask elements we set */
3643     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3644   }
3645   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3646 
3647   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3648   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3649   ierr = PetscFree(aa);CHKERRQ(ierr);
3650   ierr = PetscFree(jj);CHKERRQ(ierr);
3651   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3652 
3653   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3654   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3655   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3656   PetscFunctionReturn(0);
3657 }
3658 
3659 #undef __FUNCT__
3660 #define __FUNCT__ "MatCreateSeqBAIJ"
3661 /*@C
3662    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3663    compressed row) format.  For good matrix assembly performance the
3664    user should preallocate the matrix storage by setting the parameter nz
3665    (or the array nnz).  By setting these parameters accurately, performance
3666    during matrix assembly can be increased by more than a factor of 50.
3667 
3668    Collective on MPI_Comm
3669 
3670    Input Parameters:
3671 +  comm - MPI communicator, set to PETSC_COMM_SELF
3672 .  bs - size of block
3673 .  m - number of rows
3674 .  n - number of columns
3675 .  nz - number of nonzero blocks  per block row (same for all rows)
3676 -  nnz - array containing the number of nonzero blocks in the various block rows
3677          (possibly different for each block row) or PETSC_NULL
3678 
3679    Output Parameter:
3680 .  A - the matrix
3681 
3682    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3683    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3684    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3685 
3686    Options Database Keys:
3687 .   -mat_no_unroll - uses code that does not unroll the loops in the
3688                      block calculations (much slower)
3689 .    -mat_block_size - size of the blocks to use
3690 
3691    Level: intermediate
3692 
3693    Notes:
3694    The number of rows and columns must be divisible by blocksize.
3695 
3696    If the nnz parameter is given then the nz parameter is ignored
3697 
3698    A nonzero block is any block that as 1 or more nonzeros in it
3699 
3700    The block AIJ format is fully compatible with standard Fortran 77
3701    storage.  That is, the stored row and column indices can begin at
3702    either one (as in Fortran) or zero.  See the users' manual for details.
3703 
3704    Specify the preallocated storage with either nz or nnz (not both).
3705    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3706    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3707    matrices.
3708 
3709 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3710 @*/
3711 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3712 {
3713   PetscErrorCode ierr;
3714 
3715   PetscFunctionBegin;
3716   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3717   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3718   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3719   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3720   PetscFunctionReturn(0);
3721 }
3722 
3723 #undef __FUNCT__
3724 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3725 /*@C
3726    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3727    per row in the matrix. For good matrix assembly performance the
3728    user should preallocate the matrix storage by setting the parameter nz
3729    (or the array nnz).  By setting these parameters accurately, performance
3730    during matrix assembly can be increased by more than a factor of 50.
3731 
3732    Collective on MPI_Comm
3733 
3734    Input Parameters:
3735 +  A - the matrix
3736 .  bs - size of block
3737 .  nz - number of block nonzeros per block row (same for all rows)
3738 -  nnz - array containing the number of block nonzeros in the various block rows
3739          (possibly different for each block row) or PETSC_NULL
3740 
3741    Options Database Keys:
3742 .   -mat_no_unroll - uses code that does not unroll the loops in the
3743                      block calculations (much slower)
3744 .    -mat_block_size - size of the blocks to use
3745 
3746    Level: intermediate
3747 
3748    Notes:
3749    If the nnz parameter is given then the nz parameter is ignored
3750 
3751    You can call MatGetInfo() to get information on how effective the preallocation was;
3752    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3753    You can also run with the option -info and look for messages with the string
3754    malloc in them to see if additional memory allocation was needed.
3755 
3756    The block AIJ format is fully compatible with standard Fortran 77
3757    storage.  That is, the stored row and column indices can begin at
3758    either one (as in Fortran) or zero.  See the users' manual for details.
3759 
3760    Specify the preallocated storage with either nz or nnz (not both).
3761    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3762    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3763 
3764 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3765 @*/
3766 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3767 {
3768   PetscErrorCode ierr;
3769 
3770   PetscFunctionBegin;
3771   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3772   PetscFunctionReturn(0);
3773 }
3774 
3775 #undef __FUNCT__
3776 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3777 /*@C
3778    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3779    (the default sequential PETSc format).
3780 
3781    Collective on MPI_Comm
3782 
3783    Input Parameters:
3784 +  A - the matrix
3785 .  i - the indices into j for the start of each local row (starts with zero)
3786 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3787 -  v - optional values in the matrix
3788 
3789    Level: developer
3790 
3791 .keywords: matrix, aij, compressed row, sparse
3792 
3793 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3794 @*/
3795 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3796 {
3797   PetscErrorCode ierr;
3798 
3799   PetscFunctionBegin;
3800   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3801   PetscFunctionReturn(0);
3802 }
3803 
3804 
3805 #undef __FUNCT__
3806 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3807 /*@
3808      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3809 
3810      Collective on MPI_Comm
3811 
3812    Input Parameters:
3813 +  comm - must be an MPI communicator of size 1
3814 .  bs - size of block
3815 .  m - number of rows
3816 .  n - number of columns
3817 .  i - row indices
3818 .  j - column indices
3819 -  a - matrix values
3820 
3821    Output Parameter:
3822 .  mat - the matrix
3823 
3824    Level: advanced
3825 
3826    Notes:
3827        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3828     once the matrix is destroyed
3829 
3830        You cannot set new nonzero locations into this matrix, that will generate an error.
3831 
3832        The i and j indices are 0 based
3833 
3834        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3835 
3836 
3837 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3838 
3839 @*/
3840 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3841 {
3842   PetscErrorCode ierr;
3843   PetscInt       ii;
3844   Mat_SeqBAIJ    *baij;
3845 
3846   PetscFunctionBegin;
3847   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3848   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3849 
3850   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3851   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3852   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3853   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3854   baij = (Mat_SeqBAIJ*)(*mat)->data;
3855   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3856   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3857 
3858   baij->i = i;
3859   baij->j = j;
3860   baij->a = a;
3861   baij->singlemalloc = PETSC_FALSE;
3862   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3863   baij->free_a       = PETSC_FALSE;
3864   baij->free_ij       = PETSC_FALSE;
3865 
3866   for (ii=0; ii<m; ii++) {
3867     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3868 #if defined(PETSC_USE_DEBUG)
3869     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3870 #endif
3871   }
3872 #if defined(PETSC_USE_DEBUG)
3873   for (ii=0; ii<baij->i[m]; ii++) {
3874     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3875     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3876   }
3877 #endif
3878 
3879   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3880   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3881   PetscFunctionReturn(0);
3882 }
3883