xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d61e7eb36d836a8e2bf1ad3f41344afcd0d5cf20)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31   ierr = PetscUseMethod(mat,"MatSeqBAIJInvertBlockDiagonal_C",(Mat),(mat));CHKERRQ(ierr);
32   PetscFunctionReturn(0);
33 }
34 
35 EXTERN_C_BEGIN
36 #undef __FUNCT__
37 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
38 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
39 {
40   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
41   PetscErrorCode ierr;
42   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
43   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
44   PetscReal      shift = 0.0;
45 
46   PetscFunctionBegin;
47   if (a->idiagvalid) PetscFunctionReturn(0);
48   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
49   diag_offset = a->diag;
50   if (!a->idiag) {
51     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
52     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
53   }
54   diag  = a->idiag;
55   mdiag = a->idiag+bs2*mbs;
56   /* factor and invert each block */
57   switch (bs){
58     case 1:
59       for (i=0; i<mbs; i++) {
60         odiag = v + 1*diag_offset[i];
61         diag[0]  = odiag[0];
62         mdiag[0] = odiag[0];
63         diag[0]  = 1.0 / (diag[0] + shift);
64         diag    += 1;
65         mdiag   += 1;
66       }
67       break;
68     case 2:
69       for (i=0; i<mbs; i++) {
70         odiag   = v + 4*diag_offset[i];
71         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
72 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
73 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
74 	diag    += 4;
75 	mdiag   += 4;
76       }
77       break;
78     case 3:
79       for (i=0; i<mbs; i++) {
80         odiag    = v + 9*diag_offset[i];
81         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
82         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
83         diag[8]  = odiag[8];
84         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
85         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
86         mdiag[8] = odiag[8];
87 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
88 	diag    += 9;
89 	mdiag   += 9;
90       }
91       break;
92     case 4:
93       for (i=0; i<mbs; i++) {
94         odiag  = v + 16*diag_offset[i];
95         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
97 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
98 	diag  += 16;
99 	mdiag += 16;
100       }
101       break;
102     case 5:
103       for (i=0; i<mbs; i++) {
104         odiag = v + 25*diag_offset[i];
105         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
107 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
108 	diag  += 25;
109 	mdiag += 25;
110       }
111       break;
112     case 6:
113       for (i=0; i<mbs; i++) {
114         odiag = v + 36*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
117 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
118 	diag  += 36;
119 	mdiag += 36;
120       }
121       break;
122     case 7:
123       for (i=0; i<mbs; i++) {
124         odiag = v + 49*diag_offset[i];
125         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
126         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
127 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
128 	diag  += 49;
129 	mdiag += 49;
130       }
131       break;
132     default:
133       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
134       for (i=0; i<mbs; i++) {
135         odiag = v + bs2*diag_offset[i];
136         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
137         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
138         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
139 	diag  += bs2;
140 	mdiag += bs2;
141       }
142       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
143   }
144   a->idiagvalid = PETSC_TRUE;
145   PetscFunctionReturn(0);
146 }
147 EXTERN_C_END
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
151 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
152 {
153   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
154   PetscScalar        *x,x1,s1;
155   const PetscScalar  *b;
156   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
157   PetscErrorCode     ierr;
158   PetscInt           m = a->mbs,i,i2,nz,j;
159   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
160 
161   PetscFunctionBegin;
162   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
163   its = its*lits;
164   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
165   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
166   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
167   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
168   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
169 
170   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
171 
172   diag  = a->diag;
173   idiag = a->idiag;
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
179       x[0] = b[0]*idiag[0];
180       i2     = 1;
181       idiag += 1;
182       for (i=1; i<m; i++) {
183         v     = aa + ai[i];
184         vi    = aj + ai[i];
185         nz    = diag[i] - ai[i];
186         s1    = b[i2];
187         for (j=0; j<nz; j++) {
188           s1 -= v[j]*x[vi[j]];
189         }
190         x[i2]   = idiag[0]*s1;
191         idiag   += 1;
192         i2      += 1;
193       }
194       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
195       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
196     }
197     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
198         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
199       i2    = 0;
200       mdiag = a->idiag+a->mbs;
201       for (i=0; i<m; i++) {
202         x1      = x[i2];
203         x[i2]   = mdiag[0]*x1;
204         mdiag  += 1;
205         i2     += 1;
206       }
207       ierr = PetscLogFlops(m);CHKERRQ(ierr);
208     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
209       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
210     }
211     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
212       idiag   = a->idiag+a->mbs - 1;
213       i2      = m - 1;
214       x1      = x[i2];
215       x[i2]   = idiag[0]*x1;
216       idiag -= 1;
217       i2    -= 1;
218       for (i=m-2; i>=0; i--) {
219         v     = aa + (diag[i]+1);
220         vi    = aj + diag[i] + 1;
221         nz    = ai[i+1] - diag[i] - 1;
222         s1    = x[i2];
223         for (j=0; j<nz; j++) {
224           s1 -= v[j]*x[vi[j]];
225         }
226         x[i2]   = idiag[0]*s1;
227         idiag   -= 1;
228         i2      -= 1;
229       }
230       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
231     }
232   } else {
233     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
234   }
235   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
236   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
237   PetscFunctionReturn(0);
238 }
239 
240 #undef __FUNCT__
241 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
242 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
243 {
244   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
245   PetscScalar        *x,x1,x2,s1,s2;
246   const PetscScalar  *b;
247   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
248   PetscErrorCode     ierr;
249   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
250   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
251 
252   PetscFunctionBegin;
253   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
254   its = its*lits;
255   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
256   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
257   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
258   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
259   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
260 
261   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
262 
263   diag  = a->diag;
264   idiag = a->idiag;
265   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
266   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
267 
268   if (flag & SOR_ZERO_INITIAL_GUESS) {
269     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
270       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
271       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
272       i2     = 2;
273       idiag += 4;
274       for (i=1; i<m; i++) {
275 	v     = aa + 4*ai[i];
276 	vi    = aj + ai[i];
277 	nz    = diag[i] - ai[i];
278 	s1    = b[i2]; s2 = b[i2+1];
279         for (j=0; j<nz; j++) {
280 	  idx  = 2*vi[j];
281           it   = 4*j;
282 	  x1   = x[idx]; x2 = x[1+idx];
283 	  s1  -= v[it]*x1 + v[it+2]*x2;
284 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
285 	}
286 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
287 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
288         idiag   += 4;
289         i2      += 2;
290       }
291       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
292       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
293     }
294     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
295         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
296       i2    = 0;
297       mdiag = a->idiag+4*a->mbs;
298       for (i=0; i<m; i++) {
299         x1      = x[i2]; x2 = x[i2+1];
300         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
301         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
302         mdiag  += 4;
303         i2     += 2;
304       }
305       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
306     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
307       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
308     }
309     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
310       idiag   = a->idiag+4*a->mbs - 4;
311       i2      = 2*m - 2;
312       x1      = x[i2]; x2 = x[i2+1];
313       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
314       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
315       idiag -= 4;
316       i2    -= 2;
317       for (i=m-2; i>=0; i--) {
318 	v     = aa + 4*(diag[i]+1);
319 	vi    = aj + diag[i] + 1;
320 	nz    = ai[i+1] - diag[i] - 1;
321 	s1    = x[i2]; s2 = x[i2+1];
322         for (j=0; j<nz; j++) {
323  	  idx  = 2*vi[j];
324           it   = 4*j;
325 	  x1   = x[idx]; x2 = x[1+idx];
326 	  s1  -= v[it]*x1 + v[it+2]*x2;
327 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
328 	}
329 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
330 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
331         idiag   -= 4;
332         i2      -= 2;
333       }
334       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
335     }
336   } else {
337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
338   }
339   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
340   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
341   PetscFunctionReturn(0);
342 }
343 
344 #undef __FUNCT__
345 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
346 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
347 {
348   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
349   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
350   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
351   const PetscScalar  *b;
352   PetscErrorCode     ierr;
353   PetscInt           m = a->mbs,i,i2,nz,idx;
354   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
355 
356   PetscFunctionBegin;
357   its = its*lits;
358   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
359   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
360   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
361   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
362   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
363   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
364 
365   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
366 
367   diag  = a->diag;
368   idiag = a->idiag;
369   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
370   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
371 
372   if (flag & SOR_ZERO_INITIAL_GUESS) {
373     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
374       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
375       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
376       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
377       i2     = 3;
378       idiag += 9;
379       for (i=1; i<m; i++) {
380         v     = aa + 9*ai[i];
381         vi    = aj + ai[i];
382         nz    = diag[i] - ai[i];
383         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
384         while (nz--) {
385           idx  = 3*(*vi++);
386           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
387           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
388           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
389           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
390           v   += 9;
391         }
392         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
393         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
394         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
395         idiag   += 9;
396         i2      += 3;
397       }
398       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
399       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
400     }
401     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
402         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
403       i2    = 0;
404       mdiag = a->idiag+9*a->mbs;
405       for (i=0; i<m; i++) {
406         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
407         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
408         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
409         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
410         mdiag  += 9;
411         i2     += 3;
412       }
413       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
414     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
415       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
416     }
417     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
418       idiag   = a->idiag+9*a->mbs - 9;
419       i2      = 3*m - 3;
420       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
421       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
422       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
423       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
424       idiag -= 9;
425       i2    -= 3;
426       for (i=m-2; i>=0; i--) {
427         v     = aa + 9*(diag[i]+1);
428         vi    = aj + diag[i] + 1;
429         nz    = ai[i+1] - diag[i] - 1;
430         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
431         while (nz--) {
432           idx  = 3*(*vi++);
433           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
434           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
435           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
436           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
437           v   += 9;
438         }
439         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
440         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
441         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
442         idiag   -= 9;
443         i2      -= 3;
444       }
445       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
446     }
447   } else {
448     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
449   }
450   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
451   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
452   PetscFunctionReturn(0);
453 }
454 
455 #undef __FUNCT__
456 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
457 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
458 {
459   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
460   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
461   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
462   const PetscScalar  *b;
463   PetscErrorCode     ierr;
464   PetscInt           m = a->mbs,i,i2,nz,idx;
465   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
466 
467   PetscFunctionBegin;
468   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
469   its = its*lits;
470   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
471   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
472   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
473   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
474   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
475 
476   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
477 
478   diag  = a->diag;
479   idiag = a->idiag;
480   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
481   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
482 
483   if (flag & SOR_ZERO_INITIAL_GUESS) {
484     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
485       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
486       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
487       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
488       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
489       i2     = 4;
490       idiag += 16;
491       for (i=1; i<m; i++) {
492 	v     = aa + 16*ai[i];
493 	vi    = aj + ai[i];
494 	nz    = diag[i] - ai[i];
495 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
496 	while (nz--) {
497 	  idx  = 4*(*vi++);
498 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
499 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
500 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
501 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
502 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
503 	  v   += 16;
504 	}
505 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
506 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
507 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
508 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
509         idiag   += 16;
510         i2      += 4;
511       }
512       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
513       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
514     }
515     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
516         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
517       i2    = 0;
518       mdiag = a->idiag+16*a->mbs;
519       for (i=0; i<m; i++) {
520         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
521         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
522         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
523         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
524         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
525         mdiag  += 16;
526         i2     += 4;
527       }
528       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
529     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
530       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
531     }
532     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
533       idiag   = a->idiag+16*a->mbs - 16;
534       i2      = 4*m - 4;
535       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
536       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
537       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
538       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
539       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
540       idiag -= 16;
541       i2    -= 4;
542       for (i=m-2; i>=0; i--) {
543 	v     = aa + 16*(diag[i]+1);
544 	vi    = aj + diag[i] + 1;
545 	nz    = ai[i+1] - diag[i] - 1;
546 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
547 	while (nz--) {
548 	  idx  = 4*(*vi++);
549 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
550 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
551 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
552 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
553 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
554 	  v   += 16;
555 	}
556 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
557 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
558 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
559 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
560         idiag   -= 16;
561         i2      -= 4;
562       }
563       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
564     }
565   } else {
566     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
567   }
568   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
570   PetscFunctionReturn(0);
571 }
572 
573 #undef __FUNCT__
574 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
575 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
576 {
577   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
578   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
579   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
580   const PetscScalar  *b;
581   PetscErrorCode     ierr;
582   PetscInt           m = a->mbs,i,i2,nz,idx;
583   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
584 
585   PetscFunctionBegin;
586   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
587   its = its*lits;
588   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
589   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
590   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
591   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
592   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
593 
594   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
595 
596   diag  = a->diag;
597   idiag = a->idiag;
598   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
599   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
600 
601   if (flag & SOR_ZERO_INITIAL_GUESS) {
602     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
603       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
604       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
605       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
606       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
607       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
608       i2     = 5;
609       idiag += 25;
610       for (i=1; i<m; i++) {
611 	v     = aa + 25*ai[i];
612 	vi    = aj + ai[i];
613 	nz    = diag[i] - ai[i];
614 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
615 	while (nz--) {
616 	  idx  = 5*(*vi++);
617 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
618 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
619 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
620 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
621 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
622 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
623 	  v   += 25;
624 	}
625 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
626 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
627 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
628 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
629 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
630         idiag   += 25;
631         i2      += 5;
632       }
633       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
634       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
635     }
636     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
637         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
638       i2    = 0;
639       mdiag = a->idiag+25*a->mbs;
640       for (i=0; i<m; i++) {
641         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
642         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
643         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
644         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
645         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
646         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
647         mdiag  += 25;
648         i2     += 5;
649       }
650       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
651     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
652       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
653     }
654     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
655       idiag   = a->idiag+25*a->mbs - 25;
656       i2      = 5*m - 5;
657       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
658       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
659       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
660       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
661       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
662       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
663       idiag -= 25;
664       i2    -= 5;
665       for (i=m-2; i>=0; i--) {
666 	v     = aa + 25*(diag[i]+1);
667 	vi    = aj + diag[i] + 1;
668 	nz    = ai[i+1] - diag[i] - 1;
669 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
670 	while (nz--) {
671 	  idx  = 5*(*vi++);
672 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
673 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
674 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
675 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
676 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
677 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
678 	  v   += 25;
679 	}
680 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
681 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
682 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
683 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
684 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
685         idiag   -= 25;
686         i2      -= 5;
687       }
688       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
689     }
690   } else {
691     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
692   }
693   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
694   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
695   PetscFunctionReturn(0);
696 }
697 
698 #undef __FUNCT__
699 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
700 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
701 {
702   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
703   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
704   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
705   const PetscScalar  *b;
706   PetscErrorCode     ierr;
707   PetscInt           m = a->mbs,i,i2,nz,idx;
708   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
709 
710   PetscFunctionBegin;
711   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
712   its = its*lits;
713   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
714   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
715   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
716   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
717   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
718 
719   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
720 
721   diag  = a->diag;
722   idiag = a->idiag;
723   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
724   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
725 
726   if (flag & SOR_ZERO_INITIAL_GUESS) {
727     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
728       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
729       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
730       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
731       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
732       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
733       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
734       i2     = 6;
735       idiag += 36;
736       for (i=1; i<m; i++) {
737         v     = aa + 36*ai[i];
738         vi    = aj + ai[i];
739         nz    = diag[i] - ai[i];
740         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
741         while (nz--) {
742           idx  = 6*(*vi++);
743           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
744           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
745           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
746           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
747           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
748           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
749           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
750           v   += 36;
751         }
752         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
753         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
754         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
755         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
756         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
757         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
758         idiag   += 36;
759         i2      += 6;
760       }
761       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
762       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
763     }
764     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
765         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
766       i2    = 0;
767       mdiag = a->idiag+36*a->mbs;
768       for (i=0; i<m; i++) {
769         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
770         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
771         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
772         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
773         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
774         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
775         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
776         mdiag  += 36;
777         i2     += 6;
778       }
779       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
780     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
781       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
782     }
783     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
784       idiag   = a->idiag+36*a->mbs - 36;
785       i2      = 6*m - 6;
786       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
787       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
788       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
789       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
790       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
791       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
792       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
793       idiag -= 36;
794       i2    -= 6;
795       for (i=m-2; i>=0; i--) {
796         v     = aa + 36*(diag[i]+1);
797         vi    = aj + diag[i] + 1;
798         nz    = ai[i+1] - diag[i] - 1;
799         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
800         while (nz--) {
801           idx  = 6*(*vi++);
802           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
803           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
804           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
805           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
806           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
807           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
808           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
809           v   += 36;
810         }
811         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
812         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
813         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
814         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
815         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
816         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
817         idiag   -= 36;
818         i2      -= 6;
819       }
820       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
821     }
822   } else {
823     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
824   }
825   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
826   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
832 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
833 {
834   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
835   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
836   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
837   const PetscScalar  *b;
838   PetscErrorCode     ierr;
839   PetscInt           m = a->mbs,i,i2,nz,idx;
840   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
841 
842   PetscFunctionBegin;
843   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
844   its = its*lits;
845   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
846   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
847   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
848   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
849   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
850 
851   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
852 
853   diag  = a->diag;
854   idiag = a->idiag;
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
857 
858   if (flag & SOR_ZERO_INITIAL_GUESS) {
859     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
860       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
861       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
862       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
863       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
864       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
865       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
866       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
867       i2     = 7;
868       idiag += 49;
869       for (i=1; i<m; i++) {
870         v     = aa + 49*ai[i];
871         vi    = aj + ai[i];
872         nz    = diag[i] - ai[i];
873         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
874         while (nz--) {
875           idx  = 7*(*vi++);
876           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
877           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
878           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
879           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
880           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
881           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
882           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
883           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
884           v   += 49;
885         }
886         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
887         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
888         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
889         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
890         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
891         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
892         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
893         idiag   += 49;
894         i2      += 7;
895       }
896       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
897       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
898     }
899     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
900         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
901       i2    = 0;
902       mdiag = a->idiag+49*a->mbs;
903       for (i=0; i<m; i++) {
904         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
905         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
906         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
907         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
908         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
909         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
910         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
911         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
912         mdiag  += 49;
913         i2     += 7;
914       }
915       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
916     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
917       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
918     }
919     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
920       idiag   = a->idiag+49*a->mbs - 49;
921       i2      = 7*m - 7;
922       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
923       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
924       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
925       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
926       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
927       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
928       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
929       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
930       idiag -= 49;
931       i2    -= 7;
932       for (i=m-2; i>=0; i--) {
933         v     = aa + 49*(diag[i]+1);
934         vi    = aj + diag[i] + 1;
935         nz    = ai[i+1] - diag[i] - 1;
936         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
937         while (nz--) {
938           idx  = 7*(*vi++);
939           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
940           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
941           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
942           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
943           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
944           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
945           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
946           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
947           v   += 49;
948         }
949         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
950         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
951         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
952         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
953         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
954         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
955         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
956         idiag   -= 49;
957         i2      -= 7;
958       }
959       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
960     }
961   } else {
962     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
963   }
964   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
971 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
972 {
973   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
974   PetscScalar        *x,*work,*w,*workt;
975   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
976   const PetscScalar  *b;
977   PetscErrorCode     ierr;
978   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
979   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
980 
981   PetscFunctionBegin;
982   its = its*lits;
983   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
984   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
985   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
986   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
987   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
988   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
989 
990   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
991 
992   diag  = a->diag;
993   idiag = a->idiag;
994   if (!a->mult_work) {
995     k    = PetscMax(A->rmap->n,A->cmap->n);
996     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
997   }
998   work = a->mult_work;
999   if (!a->sor_work) {
1000     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1001   }
1002   w = a->sor_work;
1003 
1004   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1005   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1006 
1007   if (flag & SOR_ZERO_INITIAL_GUESS) {
1008     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1009       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1010       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1011       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1012       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1013       i2     = bs;
1014       idiag += bs2;
1015       for (i=1; i<m; i++) {
1016         v     = aa + bs2*ai[i];
1017         vi    = aj + ai[i];
1018         nz    = diag[i] - ai[i];
1019 
1020         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1021 	/* copy all rows of x that are needed into contiguous space */
1022         workt = work;
1023         for (j=0; j<nz; j++) {
1024           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1025           workt += bs;
1026         }
1027         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1028        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1029         while (nz--) {
1030           idx  = N*(*vi++);
1031           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1032           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1033           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1034           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1035           v   += N2;
1036 	  } */
1037 
1038         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1039         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1040         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1041         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1042 
1043         idiag   += bs2;
1044         i2      += bs;
1045       }
1046       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1047       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1048     }
1049     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1050         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1051       i2    = 0;
1052       mdiag = a->idiag+bs2*a->mbs;
1053       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1054       for (i=0; i<m; i++) {
1055         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1056         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1057         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1058         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1059         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1060 
1061         mdiag  += bs2;
1062         i2     += bs;
1063       }
1064       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1065     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1066       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1067     }
1068     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1069       idiag   = a->idiag+bs2*a->mbs - bs2;
1070       i2      = bs*m - bs;
1071       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1072       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1073       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1074       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1075       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1076       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1077       idiag -= bs2;
1078       i2    -= bs;
1079       for (i=m-2; i>=0; i--) {
1080         v     = aa + bs2*(diag[i]+1);
1081         vi    = aj + diag[i] + 1;
1082         nz    = ai[i+1] - diag[i] - 1;
1083 
1084         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1085 	/* copy all rows of x that are needed into contiguous space */
1086         workt = work;
1087         for (j=0; j<nz; j++) {
1088           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1089           workt += bs;
1090         }
1091         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1092         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1093         while (nz--) {
1094           idx  = N*(*vi++);
1095           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1096           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1097           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1098           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1099           v   += N2;
1100 	  } */
1101 
1102         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1103         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1104         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1105         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1106         idiag   -= bs2;
1107         i2      -= bs;
1108       }
1109       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1110     }
1111   } else {
1112     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1113   }
1114   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1115   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 /*
1120     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1121 */
1122 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1123 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1124 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1125 #define matsetvaluesblocked4_ matsetvaluesblocked4
1126 #endif
1127 
1128 EXTERN_C_BEGIN
1129 #undef __FUNCT__
1130 #define __FUNCT__ "matsetvaluesblocked4_"
1131 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1132 {
1133   Mat               A = *AA;
1134   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1135   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1136   PetscInt          *ai=a->i,*ailen=a->ilen;
1137   PetscInt          *aj=a->j,stepval,lastcol = -1;
1138   const PetscScalar *value = v;
1139   MatScalar         *ap,*aa = a->a,*bap;
1140 
1141   PetscFunctionBegin;
1142   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1143   stepval = (n-1)*4;
1144   for (k=0; k<m; k++) { /* loop over added rows */
1145     row  = im[k];
1146     rp   = aj + ai[row];
1147     ap   = aa + 16*ai[row];
1148     nrow = ailen[row];
1149     low  = 0;
1150     high = nrow;
1151     for (l=0; l<n; l++) { /* loop over added columns */
1152       col = in[l];
1153       if (col <= lastcol) low = 0; else high = nrow;
1154       lastcol = col;
1155       value = v + k*(stepval+4 + l)*4;
1156       while (high-low > 7) {
1157         t = (low+high)/2;
1158         if (rp[t] > col) high = t;
1159         else             low  = t;
1160       }
1161       for (i=low; i<high; i++) {
1162         if (rp[i] > col) break;
1163         if (rp[i] == col) {
1164           bap  = ap +  16*i;
1165           for (ii=0; ii<4; ii++,value+=stepval) {
1166             for (jj=ii; jj<16; jj+=4) {
1167               bap[jj] += *value++;
1168             }
1169           }
1170           goto noinsert2;
1171         }
1172       }
1173       N = nrow++ - 1;
1174       high++; /* added new column index thus must search to one higher than before */
1175       /* shift up all the later entries in this row */
1176       for (ii=N; ii>=i; ii--) {
1177         rp[ii+1] = rp[ii];
1178         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1179       }
1180       if (N >= i) {
1181         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1182       }
1183       rp[i] = col;
1184       bap   = ap +  16*i;
1185       for (ii=0; ii<4; ii++,value+=stepval) {
1186         for (jj=ii; jj<16; jj+=4) {
1187           bap[jj] = *value++;
1188         }
1189       }
1190       noinsert2:;
1191       low = i;
1192     }
1193     ailen[row] = nrow;
1194   }
1195   PetscFunctionReturnVoid();
1196 }
1197 EXTERN_C_END
1198 
1199 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1200 #define matsetvalues4_ MATSETVALUES4
1201 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1202 #define matsetvalues4_ matsetvalues4
1203 #endif
1204 
1205 EXTERN_C_BEGIN
1206 #undef __FUNCT__
1207 #define __FUNCT__ "MatSetValues4_"
1208 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1209 {
1210   Mat         A = *AA;
1211   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1212   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1213   PetscInt    *ai=a->i,*ailen=a->ilen;
1214   PetscInt    *aj=a->j,brow,bcol;
1215   PetscInt    ridx,cidx,lastcol = -1;
1216   MatScalar   *ap,value,*aa=a->a,*bap;
1217 
1218   PetscFunctionBegin;
1219   for (k=0; k<m; k++) { /* loop over added rows */
1220     row  = im[k]; brow = row/4;
1221     rp   = aj + ai[brow];
1222     ap   = aa + 16*ai[brow];
1223     nrow = ailen[brow];
1224     low  = 0;
1225     high = nrow;
1226     for (l=0; l<n; l++) { /* loop over added columns */
1227       col = in[l]; bcol = col/4;
1228       ridx = row % 4; cidx = col % 4;
1229       value = v[l + k*n];
1230       if (col <= lastcol) low = 0; else high = nrow;
1231       lastcol = col;
1232       while (high-low > 7) {
1233         t = (low+high)/2;
1234         if (rp[t] > bcol) high = t;
1235         else              low  = t;
1236       }
1237       for (i=low; i<high; i++) {
1238         if (rp[i] > bcol) break;
1239         if (rp[i] == bcol) {
1240           bap  = ap +  16*i + 4*cidx + ridx;
1241           *bap += value;
1242           goto noinsert1;
1243         }
1244       }
1245       N = nrow++ - 1;
1246       high++; /* added new column thus must search to one higher than before */
1247       /* shift up all the later entries in this row */
1248       for (ii=N; ii>=i; ii--) {
1249         rp[ii+1] = rp[ii];
1250         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1251       }
1252       if (N>=i) {
1253         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1254       }
1255       rp[i]                    = bcol;
1256       ap[16*i + 4*cidx + ridx] = value;
1257       noinsert1:;
1258       low = i;
1259     }
1260     ailen[brow] = nrow;
1261   }
1262   PetscFunctionReturnVoid();
1263 }
1264 EXTERN_C_END
1265 
1266 /*
1267      Checks for missing diagonals
1268 */
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1271 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1272 {
1273   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1274   PetscErrorCode ierr;
1275   PetscInt       *diag,*jj = a->j,i;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1279   *missing = PETSC_FALSE;
1280   if (A->rmap->n > 0 && !jj) {
1281     *missing  = PETSC_TRUE;
1282     if (d) *d = 0;
1283     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1284   } else {
1285     diag     = a->diag;
1286     for (i=0; i<a->mbs; i++) {
1287       if (jj[diag[i]] != i) {
1288         *missing  = PETSC_TRUE;
1289         if (d) *d = i;
1290         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1291       }
1292     }
1293   }
1294   PetscFunctionReturn(0);
1295 }
1296 
1297 #undef __FUNCT__
1298 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1299 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1300 {
1301   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1302   PetscErrorCode ierr;
1303   PetscInt       i,j,m = a->mbs;
1304 
1305   PetscFunctionBegin;
1306   if (!a->diag) {
1307     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1308     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1309     a->free_diag = PETSC_TRUE;
1310   }
1311   for (i=0; i<m; i++) {
1312     a->diag[i] = a->i[i+1];
1313     for (j=a->i[i]; j<a->i[i+1]; j++) {
1314       if (a->j[j] == i) {
1315         a->diag[i] = j;
1316         break;
1317       }
1318     }
1319   }
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 
1324 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1328 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1333   PetscInt       *tia, *tja;
1334 
1335   PetscFunctionBegin;
1336   *nn = n;
1337   if (!ia) PetscFunctionReturn(0);
1338   if (symmetric) {
1339     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1340     nz   = tia[n];
1341   } else {
1342     tia = a->i; tja = a->j;
1343   }
1344 
1345   if (!blockcompressed && bs > 1) {
1346     (*nn) *= bs;
1347     /* malloc & create the natural set of indices */
1348     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1349     if (n) {
1350       (*ia)[0] = 0;
1351       for (j=1; j<bs; j++) {
1352         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1353       }
1354     }
1355 
1356     for (i=1; i<n; i++) {
1357       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1358       for (j=1; j<bs; j++) {
1359         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1360       }
1361     }
1362     if (n) {
1363       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1364     }
1365 
1366     if (ja) {
1367       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1368       cnt = 0;
1369       for (i=0; i<n; i++) {
1370         for (j=0; j<bs; j++) {
1371           for (k=tia[i]; k<tia[i+1]; k++) {
1372             for (l=0; l<bs; l++) {
1373               (*ja)[cnt++] = bs*tja[k] + l;
1374 	    }
1375           }
1376         }
1377       }
1378     }
1379 
1380     n     *= bs;
1381     nz *= bs*bs;
1382     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1383       ierr = PetscFree(tia);CHKERRQ(ierr);
1384       ierr = PetscFree(tja);CHKERRQ(ierr);
1385     }
1386   } else if (oshift == 1) {
1387     if (symmetric) {
1388       PetscInt nz = tia[A->rmap->n/bs];
1389       /*  add 1 to i and j indices */
1390       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1391       *ia = tia;
1392       if (ja) {
1393 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1394         *ja = tja;
1395       }
1396     } else {
1397       PetscInt nz = a->i[A->rmap->n/bs];
1398       /* malloc space and  add 1 to i and j indices */
1399       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1400       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1401       if (ja) {
1402 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1403 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1404       }
1405     }
1406   } else {
1407     *ia = tia;
1408     if (ja) *ja = tja;
1409   }
1410 
1411   PetscFunctionReturn(0);
1412 }
1413 
1414 #undef __FUNCT__
1415 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1416 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
1417 {
1418   PetscErrorCode ierr;
1419 
1420   PetscFunctionBegin;
1421   if (!ia) PetscFunctionReturn(0);
1422   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1423     ierr = PetscFree(*ia);CHKERRQ(ierr);
1424     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1431 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1432 {
1433   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1434   PetscErrorCode ierr;
1435 
1436   PetscFunctionBegin;
1437 #if defined(PETSC_USE_LOG)
1438   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1439 #endif
1440   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1441   if (a->row) {
1442     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1443   }
1444   if (a->col) {
1445     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1446   }
1447   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1448   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1449   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1450   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1451   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1452   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1453   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1455   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1456   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1457 
1458   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1459   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1460   ierr = PetscFree(a);CHKERRQ(ierr);
1461 
1462   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1463   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1465   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1466   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1467   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1468   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1476 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1477 {
1478   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1479   PetscErrorCode ierr;
1480 
1481   PetscFunctionBegin;
1482   switch (op) {
1483   case MAT_ROW_ORIENTED:
1484     a->roworiented    = flg;
1485     break;
1486   case MAT_KEEP_NONZERO_PATTERN:
1487     a->keepnonzeropattern = flg;
1488     break;
1489   case MAT_NEW_NONZERO_LOCATIONS:
1490     a->nonew          = (flg ? 0 : 1);
1491     break;
1492   case MAT_NEW_NONZERO_LOCATION_ERR:
1493     a->nonew          = (flg ? -1 : 0);
1494     break;
1495   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1496     a->nonew          = (flg ? -2 : 0);
1497     break;
1498   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1499     a->nounused       = (flg ? -1 : 0);
1500     break;
1501   case MAT_NEW_DIAGONALS:
1502   case MAT_IGNORE_OFF_PROC_ENTRIES:
1503   case MAT_USE_HASH_TABLE:
1504     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1505     break;
1506   case MAT_SYMMETRIC:
1507   case MAT_STRUCTURALLY_SYMMETRIC:
1508   case MAT_HERMITIAN:
1509   case MAT_SYMMETRY_ETERNAL:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   default:
1513     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1514   }
1515   PetscFunctionReturn(0);
1516 }
1517 
1518 #undef __FUNCT__
1519 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1520 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1521 {
1522   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1523   PetscErrorCode ierr;
1524   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1525   MatScalar      *aa,*aa_i;
1526   PetscScalar    *v_i;
1527 
1528   PetscFunctionBegin;
1529   bs  = A->rmap->bs;
1530   ai  = a->i;
1531   aj  = a->j;
1532   aa  = a->a;
1533   bs2 = a->bs2;
1534 
1535   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1536 
1537   bn  = row/bs;   /* Block number */
1538   bp  = row % bs; /* Block Position */
1539   M   = ai[bn+1] - ai[bn];
1540   *nz = bs*M;
1541 
1542   if (v) {
1543     *v = 0;
1544     if (*nz) {
1545       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1546       for (i=0; i<M; i++) { /* for each block in the block row */
1547         v_i  = *v + i*bs;
1548         aa_i = aa + bs2*(ai[bn] + i);
1549         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1550       }
1551     }
1552   }
1553 
1554   if (idx) {
1555     *idx = 0;
1556     if (*nz) {
1557       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1558       for (i=0; i<M; i++) { /* for each block in the block row */
1559         idx_i = *idx + i*bs;
1560         itmp  = bs*aj[ai[bn] + i];
1561         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1562       }
1563     }
1564   }
1565   PetscFunctionReturn(0);
1566 }
1567 
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1570 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1571 {
1572   PetscErrorCode ierr;
1573 
1574   PetscFunctionBegin;
1575   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1576   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1584 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1585 {
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1587   Mat            C;
1588   PetscErrorCode ierr;
1589   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1590   PetscInt       *rows,*cols,bs2=a->bs2;
1591   MatScalar      *array;
1592 
1593   PetscFunctionBegin;
1594   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1595   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1596     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1597     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1598 
1599     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1600     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1601     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1602     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1603     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1604     ierr = PetscFree(col);CHKERRQ(ierr);
1605   } else {
1606     C = *B;
1607   }
1608 
1609   array = a->a;
1610   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1611   for (i=0; i<mbs; i++) {
1612     cols[0] = i*bs;
1613     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1614     len = ai[i+1] - ai[i];
1615     for (j=0; j<len; j++) {
1616       rows[0] = (*aj++)*bs;
1617       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1618       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1619       array += bs2;
1620     }
1621   }
1622   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1623 
1624   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1625   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1626 
1627   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1628     *B = C;
1629   } else {
1630     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1631   }
1632   PetscFunctionReturn(0);
1633 }
1634 
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1637 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1638 {
1639   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1640   PetscErrorCode ierr;
1641   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1642   int            fd;
1643   PetscScalar    *aa;
1644   FILE           *file;
1645 
1646   PetscFunctionBegin;
1647   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1648   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1649   col_lens[0] = MAT_FILE_CLASSID;
1650 
1651   col_lens[1] = A->rmap->N;
1652   col_lens[2] = A->cmap->n;
1653   col_lens[3] = a->nz*bs2;
1654 
1655   /* store lengths of each row and write (including header) to file */
1656   count = 0;
1657   for (i=0; i<a->mbs; i++) {
1658     for (j=0; j<bs; j++) {
1659       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1660     }
1661   }
1662   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1663   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1664 
1665   /* store column indices (zero start index) */
1666   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1667   count = 0;
1668   for (i=0; i<a->mbs; i++) {
1669     for (j=0; j<bs; j++) {
1670       for (k=a->i[i]; k<a->i[i+1]; k++) {
1671         for (l=0; l<bs; l++) {
1672           jj[count++] = bs*a->j[k] + l;
1673         }
1674       }
1675     }
1676   }
1677   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1678   ierr = PetscFree(jj);CHKERRQ(ierr);
1679 
1680   /* store nonzero values */
1681   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1682   count = 0;
1683   for (i=0; i<a->mbs; i++) {
1684     for (j=0; j<bs; j++) {
1685       for (k=a->i[i]; k<a->i[i+1]; k++) {
1686         for (l=0; l<bs; l++) {
1687           aa[count++] = a->a[bs2*k + l*bs + j];
1688         }
1689       }
1690     }
1691   }
1692   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1693   ierr = PetscFree(aa);CHKERRQ(ierr);
1694 
1695   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1696   if (file) {
1697     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1698   }
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1704 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1705 {
1706   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1707   PetscErrorCode    ierr;
1708   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1709   PetscViewerFormat format;
1710 
1711   PetscFunctionBegin;
1712   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1713   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1714     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1715   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1716     Mat aij;
1717     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1718     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1719     ierr = MatDestroy(aij);CHKERRQ(ierr);
1720   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1721      PetscFunctionReturn(0);
1722   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1723     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1724     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1725     for (i=0; i<a->mbs; i++) {
1726       for (j=0; j<bs; j++) {
1727         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1728         for (k=a->i[i]; k<a->i[i+1]; k++) {
1729           for (l=0; l<bs; l++) {
1730 #if defined(PETSC_USE_COMPLEX)
1731             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1732               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1733                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1735               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1736                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1737             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1738               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             }
1740 #else
1741             if (a->a[bs2*k + l*bs + j] != 0.0) {
1742               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1743             }
1744 #endif
1745           }
1746         }
1747         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1748       }
1749     }
1750     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1751   } else {
1752     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1753     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1754     for (i=0; i<a->mbs; i++) {
1755       for (j=0; j<bs; j++) {
1756         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1757         for (k=a->i[i]; k<a->i[i+1]; k++) {
1758           for (l=0; l<bs; l++) {
1759 #if defined(PETSC_USE_COMPLEX)
1760             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1761               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1762                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1764               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1765                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1766             } else {
1767               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1768             }
1769 #else
1770             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1771 #endif
1772           }
1773         }
1774         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1775       }
1776     }
1777     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1778   }
1779   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 
1783 #undef __FUNCT__
1784 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1785 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1786 {
1787   Mat            A = (Mat) Aa;
1788   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1789   PetscErrorCode ierr;
1790   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1791   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1792   MatScalar      *aa;
1793   PetscViewer    viewer;
1794 
1795   PetscFunctionBegin;
1796 
1797   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1798   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1799 
1800   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1801 
1802   /* loop over matrix elements drawing boxes */
1803   color = PETSC_DRAW_BLUE;
1804   for (i=0,row=0; i<mbs; i++,row+=bs) {
1805     for (j=a->i[i]; j<a->i[i+1]; j++) {
1806       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1807       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1808       aa = a->a + j*bs2;
1809       for (k=0; k<bs; k++) {
1810         for (l=0; l<bs; l++) {
1811           if (PetscRealPart(*aa++) >=  0.) continue;
1812           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1813         }
1814       }
1815     }
1816   }
1817   color = PETSC_DRAW_CYAN;
1818   for (i=0,row=0; i<mbs; i++,row+=bs) {
1819     for (j=a->i[i]; j<a->i[i+1]; j++) {
1820       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1821       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1822       aa = a->a + j*bs2;
1823       for (k=0; k<bs; k++) {
1824         for (l=0; l<bs; l++) {
1825           if (PetscRealPart(*aa++) != 0.) continue;
1826           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1827         }
1828       }
1829     }
1830   }
1831 
1832   color = PETSC_DRAW_RED;
1833   for (i=0,row=0; i<mbs; i++,row+=bs) {
1834     for (j=a->i[i]; j<a->i[i+1]; j++) {
1835       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1836       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1837       aa = a->a + j*bs2;
1838       for (k=0; k<bs; k++) {
1839         for (l=0; l<bs; l++) {
1840           if (PetscRealPart(*aa++) <= 0.) continue;
1841           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1842         }
1843       }
1844     }
1845   }
1846   PetscFunctionReturn(0);
1847 }
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1851 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1852 {
1853   PetscErrorCode ierr;
1854   PetscReal      xl,yl,xr,yr,w,h;
1855   PetscDraw      draw;
1856   PetscBool      isnull;
1857 
1858   PetscFunctionBegin;
1859 
1860   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1861   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1862 
1863   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1864   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1865   xr += w;    yr += h;  xl = -w;     yl = -h;
1866   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1867   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1868   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1869   PetscFunctionReturn(0);
1870 }
1871 
1872 #undef __FUNCT__
1873 #define __FUNCT__ "MatView_SeqBAIJ"
1874 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1875 {
1876   PetscErrorCode ierr;
1877   PetscBool      iascii,isbinary,isdraw;
1878 
1879   PetscFunctionBegin;
1880   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1881   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1882   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1883   if (iascii){
1884     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1885   } else if (isbinary) {
1886     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1887   } else if (isdraw) {
1888     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1889   } else {
1890     Mat B;
1891     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1892     ierr = MatView(B,viewer);CHKERRQ(ierr);
1893     ierr = MatDestroy(B);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1901 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1902 {
1903   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1905   PetscInt    *ai = a->i,*ailen = a->ilen;
1906   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1907   MatScalar   *ap,*aa = a->a;
1908 
1909   PetscFunctionBegin;
1910   for (k=0; k<m; k++) { /* loop over rows */
1911     row  = im[k]; brow = row/bs;
1912     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1913     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1914     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1915     nrow = ailen[brow];
1916     for (l=0; l<n; l++) { /* loop over columns */
1917       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1918       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1919       col  = in[l] ;
1920       bcol = col/bs;
1921       cidx = col%bs;
1922       ridx = row%bs;
1923       high = nrow;
1924       low  = 0; /* assume unsorted */
1925       while (high-low > 5) {
1926         t = (low+high)/2;
1927         if (rp[t] > bcol) high = t;
1928         else             low  = t;
1929       }
1930       for (i=low; i<high; i++) {
1931         if (rp[i] > bcol) break;
1932         if (rp[i] == bcol) {
1933           *v++ = ap[bs2*i+bs*cidx+ridx];
1934           goto finished;
1935         }
1936       }
1937       *v++ = 0.0;
1938       finished:;
1939     }
1940   }
1941   PetscFunctionReturn(0);
1942 }
1943 
1944 #undef __FUNCT__
1945 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1946 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1947 {
1948   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1949   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1950   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1951   PetscErrorCode    ierr;
1952   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1953   PetscBool         roworiented=a->roworiented;
1954   const PetscScalar *value = v;
1955   MatScalar         *ap,*aa = a->a,*bap;
1956 
1957   PetscFunctionBegin;
1958   if (roworiented) {
1959     stepval = (n-1)*bs;
1960   } else {
1961     stepval = (m-1)*bs;
1962   }
1963   for (k=0; k<m; k++) { /* loop over added rows */
1964     row  = im[k];
1965     if (row < 0) continue;
1966 #if defined(PETSC_USE_DEBUG)
1967     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1968 #endif
1969     rp   = aj + ai[row];
1970     ap   = aa + bs2*ai[row];
1971     rmax = imax[row];
1972     nrow = ailen[row];
1973     low  = 0;
1974     high = nrow;
1975     for (l=0; l<n; l++) { /* loop over added columns */
1976       if (in[l] < 0) continue;
1977 #if defined(PETSC_USE_DEBUG)
1978       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1979 #endif
1980       col = in[l];
1981       if (roworiented) {
1982         value = v + (k*(stepval+bs) + l)*bs;
1983       } else {
1984         value = v + (l*(stepval+bs) + k)*bs;
1985       }
1986       if (col <= lastcol) low = 0; else high = nrow;
1987       lastcol = col;
1988       while (high-low > 7) {
1989         t = (low+high)/2;
1990         if (rp[t] > col) high = t;
1991         else             low  = t;
1992       }
1993       for (i=low; i<high; i++) {
1994         if (rp[i] > col) break;
1995         if (rp[i] == col) {
1996           bap  = ap +  bs2*i;
1997           if (roworiented) {
1998             if (is == ADD_VALUES) {
1999               for (ii=0; ii<bs; ii++,value+=stepval) {
2000                 for (jj=ii; jj<bs2; jj+=bs) {
2001                   bap[jj] += *value++;
2002                 }
2003               }
2004             } else {
2005               for (ii=0; ii<bs; ii++,value+=stepval) {
2006                 for (jj=ii; jj<bs2; jj+=bs) {
2007                   bap[jj] = *value++;
2008                 }
2009               }
2010             }
2011           } else {
2012             if (is == ADD_VALUES) {
2013               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2014                 for (jj=0; jj<bs; jj++) {
2015                   bap[jj] += value[jj];
2016                 }
2017                 bap += bs;
2018               }
2019             } else {
2020               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2021                 for (jj=0; jj<bs; jj++) {
2022                   bap[jj]  = value[jj];
2023                 }
2024                 bap += bs;
2025               }
2026             }
2027           }
2028           goto noinsert2;
2029         }
2030       }
2031       if (nonew == 1) goto noinsert2;
2032       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2033       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2034       N = nrow++ - 1; high++;
2035       /* shift up all the later entries in this row */
2036       for (ii=N; ii>=i; ii--) {
2037         rp[ii+1] = rp[ii];
2038         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2039       }
2040       if (N >= i) {
2041         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2042       }
2043       rp[i] = col;
2044       bap   = ap +  bs2*i;
2045       if (roworiented) {
2046         for (ii=0; ii<bs; ii++,value+=stepval) {
2047           for (jj=ii; jj<bs2; jj+=bs) {
2048             bap[jj] = *value++;
2049           }
2050         }
2051       } else {
2052         for (ii=0; ii<bs; ii++,value+=stepval) {
2053           for (jj=0; jj<bs; jj++) {
2054             *bap++  = *value++;
2055           }
2056         }
2057       }
2058       noinsert2:;
2059       low = i;
2060     }
2061     ailen[row] = nrow;
2062   }
2063   PetscFunctionReturn(0);
2064 }
2065 
2066 #undef __FUNCT__
2067 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2068 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2069 {
2070   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2071   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2072   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2073   PetscErrorCode ierr;
2074   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2075   MatScalar      *aa = a->a,*ap;
2076   PetscReal      ratio=0.6;
2077 
2078   PetscFunctionBegin;
2079   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2080 
2081   if (m) rmax = ailen[0];
2082   for (i=1; i<mbs; i++) {
2083     /* move each row back by the amount of empty slots (fshift) before it*/
2084     fshift += imax[i-1] - ailen[i-1];
2085     rmax   = PetscMax(rmax,ailen[i]);
2086     if (fshift) {
2087       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2088       N = ailen[i];
2089       for (j=0; j<N; j++) {
2090         ip[j-fshift] = ip[j];
2091         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2092       }
2093     }
2094     ai[i] = ai[i-1] + ailen[i-1];
2095   }
2096   if (mbs) {
2097     fshift += imax[mbs-1] - ailen[mbs-1];
2098     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2099   }
2100   /* reset ilen and imax for each row */
2101   for (i=0; i<mbs; i++) {
2102     ailen[i] = imax[i] = ai[i+1] - ai[i];
2103   }
2104   a->nz = ai[mbs];
2105 
2106   /* diagonals may have moved, so kill the diagonal pointers */
2107   a->idiagvalid = PETSC_FALSE;
2108   if (fshift && a->diag) {
2109     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2110     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2111     a->diag = 0;
2112   }
2113   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2114   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2115   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2116   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2117   A->info.mallocs     += a->reallocs;
2118   a->reallocs          = 0;
2119   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2120 
2121   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
2122   if (a->compressedrow.use){
2123     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2124   }
2125 
2126   A->same_nonzero = PETSC_TRUE;
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 /*
2131    This function returns an array of flags which indicate the locations of contiguous
2132    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2133    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2134    Assume: sizes should be long enough to hold all the values.
2135 */
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2138 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2139 {
2140   PetscInt   i,j,k,row;
2141   PetscBool  flg;
2142 
2143   PetscFunctionBegin;
2144   for (i=0,j=0; i<n; j++) {
2145     row = idx[i];
2146     if (row%bs!=0) { /* Not the begining of a block */
2147       sizes[j] = 1;
2148       i++;
2149     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2150       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2151       i++;
2152     } else { /* Begining of the block, so check if the complete block exists */
2153       flg = PETSC_TRUE;
2154       for (k=1; k<bs; k++) {
2155         if (row+k != idx[i+k]) { /* break in the block */
2156           flg = PETSC_FALSE;
2157           break;
2158         }
2159       }
2160       if (flg) { /* No break in the bs */
2161         sizes[j] = bs;
2162         i+= bs;
2163       } else {
2164         sizes[j] = 1;
2165         i++;
2166       }
2167     }
2168   }
2169   *bs_max = j;
2170   PetscFunctionReturn(0);
2171 }
2172 
2173 #undef __FUNCT__
2174 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2175 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2176 {
2177   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2178   PetscErrorCode ierr;
2179   PetscInt       i,j,k,count,*rows;
2180   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2181   PetscScalar    zero = 0.0;
2182   MatScalar      *aa;
2183 
2184   PetscFunctionBegin;
2185   /* Make a copy of the IS and  sort it */
2186   /* allocate memory for rows,sizes */
2187   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2188 
2189   /* copy IS values to rows, and sort them */
2190   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2191   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2192   if (baij->keepnonzeropattern) {
2193     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2194     bs_max = is_n;
2195     A->same_nonzero = PETSC_TRUE;
2196   } else {
2197     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2198     A->same_nonzero = PETSC_FALSE;
2199   }
2200 
2201   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2202     row   = rows[j];
2203     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2204     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2205     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2206     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2207       if (diag != 0.0) {
2208         if (baij->ilen[row/bs] > 0) {
2209           baij->ilen[row/bs]       = 1;
2210           baij->j[baij->i[row/bs]] = row/bs;
2211           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2212         }
2213         /* Now insert all the diagonal values for this bs */
2214         for (k=0; k<bs; k++) {
2215           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2216         }
2217       } else { /* (diag == 0.0) */
2218         baij->ilen[row/bs] = 0;
2219       } /* end (diag == 0.0) */
2220     } else { /* (sizes[i] != bs) */
2221 #if defined (PETSC_USE_DEBUG)
2222       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2223 #endif
2224       for (k=0; k<count; k++) {
2225         aa[0] =  zero;
2226         aa    += bs;
2227       }
2228       if (diag != 0.0) {
2229         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2230       }
2231     }
2232   }
2233 
2234   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2235   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2236   PetscFunctionReturn(0);
2237 }
2238 
2239 #undef __FUNCT__
2240 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2241 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2242 {
2243   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2244   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2245   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2246   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2247   PetscErrorCode ierr;
2248   PetscInt       ridx,cidx,bs2=a->bs2;
2249   PetscBool      roworiented=a->roworiented;
2250   MatScalar      *ap,value,*aa=a->a,*bap;
2251 
2252   PetscFunctionBegin;
2253   if (v) PetscValidScalarPointer(v,6);
2254   for (k=0; k<m; k++) { /* loop over added rows */
2255     row  = im[k];
2256     brow = row/bs;
2257     if (row < 0) continue;
2258 #if defined(PETSC_USE_DEBUG)
2259     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2260 #endif
2261     rp   = aj + ai[brow];
2262     ap   = aa + bs2*ai[brow];
2263     rmax = imax[brow];
2264     nrow = ailen[brow];
2265     low  = 0;
2266     high = nrow;
2267     for (l=0; l<n; l++) { /* loop over added columns */
2268       if (in[l] < 0) continue;
2269 #if defined(PETSC_USE_DEBUG)
2270       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2271 #endif
2272       col = in[l]; bcol = col/bs;
2273       ridx = row % bs; cidx = col % bs;
2274       if (roworiented) {
2275         value = v[l + k*n];
2276       } else {
2277         value = v[k + l*m];
2278       }
2279       if (col <= lastcol) low = 0; else high = nrow;
2280       lastcol = col;
2281       while (high-low > 7) {
2282         t = (low+high)/2;
2283         if (rp[t] > bcol) high = t;
2284         else              low  = t;
2285       }
2286       for (i=low; i<high; i++) {
2287         if (rp[i] > bcol) break;
2288         if (rp[i] == bcol) {
2289           bap  = ap +  bs2*i + bs*cidx + ridx;
2290           if (is == ADD_VALUES) *bap += value;
2291           else                  *bap  = value;
2292           goto noinsert1;
2293         }
2294       }
2295       if (nonew == 1) goto noinsert1;
2296       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2297       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2298       N = nrow++ - 1; high++;
2299       /* shift up all the later entries in this row */
2300       for (ii=N; ii>=i; ii--) {
2301         rp[ii+1] = rp[ii];
2302         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2303       }
2304       if (N>=i) {
2305         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2306       }
2307       rp[i]                      = bcol;
2308       ap[bs2*i + bs*cidx + ridx] = value;
2309       a->nz++;
2310       noinsert1:;
2311       low = i;
2312     }
2313     ailen[brow] = nrow;
2314   }
2315   A->same_nonzero = PETSC_FALSE;
2316   PetscFunctionReturn(0);
2317 }
2318 
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2321 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2322 {
2323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2324   Mat            outA;
2325   PetscErrorCode ierr;
2326   PetscBool      row_identity,col_identity;
2327 
2328   PetscFunctionBegin;
2329   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2330   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2331   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2332   if (!row_identity || !col_identity) {
2333     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2334   }
2335 
2336   outA            = inA;
2337   inA->factortype = MAT_FACTOR_LU;
2338 
2339   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2340 
2341   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2342   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2343   a->row = row;
2344   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2345   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2346   a->col = col;
2347 
2348   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2349   if (a->icol) {
2350     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2351   }
2352   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2353   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2354 
2355   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2356   if (!a->solve_work) {
2357     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2358     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2359   }
2360   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2361 
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 EXTERN_C_BEGIN
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2368 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2369 {
2370   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2371   PetscInt    i,nz,mbs;
2372 
2373   PetscFunctionBegin;
2374   nz  = baij->maxnz;
2375   mbs = baij->mbs;
2376   for (i=0; i<nz; i++) {
2377     baij->j[i] = indices[i];
2378   }
2379   baij->nz = nz;
2380   for (i=0; i<mbs; i++) {
2381     baij->ilen[i] = baij->imax[i];
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 EXTERN_C_END
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2389 /*@
2390     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2391        in the matrix.
2392 
2393   Input Parameters:
2394 +  mat - the SeqBAIJ matrix
2395 -  indices - the column indices
2396 
2397   Level: advanced
2398 
2399   Notes:
2400     This can be called if you have precomputed the nonzero structure of the
2401   matrix and want to provide it to the matrix object to improve the performance
2402   of the MatSetValues() operation.
2403 
2404     You MUST have set the correct numbers of nonzeros per row in the call to
2405   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2406 
2407     MUST be called before any calls to MatSetValues();
2408 
2409 @*/
2410 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2411 {
2412   PetscErrorCode ierr;
2413 
2414   PetscFunctionBegin;
2415   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2416   PetscValidPointer(indices,2);
2417   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2423 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2424 {
2425   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2426   PetscErrorCode ierr;
2427   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2428   PetscReal      atmp;
2429   PetscScalar    *x,zero = 0.0;
2430   MatScalar      *aa;
2431   PetscInt       ncols,brow,krow,kcol;
2432 
2433   PetscFunctionBegin;
2434   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2435   bs   = A->rmap->bs;
2436   aa   = a->a;
2437   ai   = a->i;
2438   aj   = a->j;
2439   mbs  = a->mbs;
2440 
2441   ierr = VecSet(v,zero);CHKERRQ(ierr);
2442   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2443   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2444   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2445   for (i=0; i<mbs; i++) {
2446     ncols = ai[1] - ai[0]; ai++;
2447     brow  = bs*i;
2448     for (j=0; j<ncols; j++){
2449       for (kcol=0; kcol<bs; kcol++){
2450         for (krow=0; krow<bs; krow++){
2451           atmp = PetscAbsScalar(*aa);aa++;
2452           row = brow + krow;    /* row index */
2453           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2454           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2455         }
2456       }
2457       aj++;
2458     }
2459   }
2460   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatCopy_SeqBAIJ"
2466 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2467 {
2468   PetscErrorCode ierr;
2469 
2470   PetscFunctionBegin;
2471   /* If the two matrices have the same copy implementation, use fast copy. */
2472   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2473     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2474     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2475     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2476 
2477     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2478     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2479     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2480   } else {
2481     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2482   }
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2488 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2489 {
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2499 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2500 {
2501   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2502   PetscFunctionBegin;
2503   *array = a->a;
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2509 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2510 {
2511   PetscFunctionBegin;
2512   PetscFunctionReturn(0);
2513 }
2514 
2515 #undef __FUNCT__
2516 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2517 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2518 {
2519   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2520   PetscErrorCode ierr;
2521   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2522   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2523 
2524   PetscFunctionBegin;
2525   if (str == SAME_NONZERO_PATTERN) {
2526     PetscScalar alpha = a;
2527     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2528   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2529     if (y->xtoy && y->XtoY != X) {
2530       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2531       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2532     }
2533     if (!y->xtoy) { /* get xtoy */
2534       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2535       y->XtoY = X;
2536       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2537     }
2538     bs2 = bs*bs;
2539     for (i=0; i<x->nz; i++) {
2540       j = 0;
2541       while (j < bs2){
2542         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2543         j++;
2544       }
2545     }
2546     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2547   } else {
2548     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2549   }
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2555 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2556 {
2557   PetscInt rbs,cbs;
2558   PetscErrorCode ierr;
2559 
2560   PetscFunctionBegin;
2561   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2562   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2563   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2564   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2565   PetscFunctionReturn(0);
2566 }
2567 
2568 #undef __FUNCT__
2569 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2570 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2571 {
2572   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2573   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2574   MatScalar      *aa = a->a;
2575 
2576   PetscFunctionBegin;
2577   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2583 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2584 {
2585   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2586   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2587   MatScalar      *aa = a->a;
2588 
2589   PetscFunctionBegin;
2590   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2598 /*
2599     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2600 */
2601 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2602 {
2603   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2604   PetscErrorCode ierr;
2605   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2606   PetscInt       nz = a->i[m],row,*jj,mr,col;
2607 
2608   PetscFunctionBegin;
2609   *nn = n;
2610   if (!ia) PetscFunctionReturn(0);
2611   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2612   else {
2613     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2614     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2615     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2616     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2617     jj = a->j;
2618     for (i=0; i<nz; i++) {
2619       collengths[jj[i]]++;
2620     }
2621     cia[0] = oshift;
2622     for (i=0; i<n; i++) {
2623       cia[i+1] = cia[i] + collengths[i];
2624     }
2625     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2626     jj   = a->j;
2627     for (row=0; row<m; row++) {
2628       mr = a->i[row+1] - a->i[row];
2629       for (i=0; i<mr; i++) {
2630         col = *jj++;
2631         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2632       }
2633     }
2634     ierr = PetscFree(collengths);CHKERRQ(ierr);
2635     *ia = cia; *ja = cja;
2636   }
2637   PetscFunctionReturn(0);
2638 }
2639 
2640 #undef __FUNCT__
2641 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2642 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscBool  *done)
2643 {
2644   PetscErrorCode ierr;
2645 
2646   PetscFunctionBegin;
2647   if (!ia) PetscFunctionReturn(0);
2648   ierr = PetscFree(*ia);CHKERRQ(ierr);
2649   ierr = PetscFree(*ja);CHKERRQ(ierr);
2650   PetscFunctionReturn(0);
2651 }
2652 
2653 #undef __FUNCT__
2654 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2655 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2656 {
2657   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2658   PetscErrorCode ierr;
2659   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2660   PetscScalar    dx,*y,*xx,*w3_array;
2661   PetscScalar    *vscale_array;
2662   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2663   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2664   void           *fctx = coloring->fctx;
2665   PetscBool      flg = PETSC_FALSE;
2666   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2667   Vec            x1_tmp;
2668 
2669   PetscFunctionBegin;
2670   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2671   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2672   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2673   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2674 
2675   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2676   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2677   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2678   if (flg) {
2679     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2680   } else {
2681     PetscBool  assembled;
2682     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2683     if (assembled) {
2684       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2685     }
2686   }
2687 
2688   x1_tmp = x1;
2689   if (!coloring->vscale){
2690     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2691   }
2692 
2693   /*
2694     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2695     coloring->F for the coarser grids from the finest
2696   */
2697   if (coloring->F) {
2698     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2699     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2700     if (m1 != m2) {
2701       coloring->F = 0;
2702       }
2703     }
2704 
2705   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2706     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2707   }
2708   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2709 
2710   /* Set w1 = F(x1) */
2711   if (coloring->F) {
2712     w1          = coloring->F; /* use already computed value of function */
2713     coloring->F = 0;
2714   } else {
2715     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2716     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2717     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2718   }
2719 
2720   if (!coloring->w3) {
2721     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2722     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2723   }
2724   w3 = coloring->w3;
2725 
2726     CHKMEMQ;
2727     /* Compute all the local scale factors, including ghost points */
2728   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2729   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2730   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2731   if (ctype == IS_COLORING_GHOSTED){
2732     col_start = 0; col_end = N;
2733   } else if (ctype == IS_COLORING_GLOBAL){
2734     xx = xx - start;
2735     vscale_array = vscale_array - start;
2736     col_start = start; col_end = N + start;
2737   }    CHKMEMQ;
2738   for (col=col_start; col<col_end; col++){
2739     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2740     if (coloring->htype[0] == 'w') {
2741       dx = 1.0 + unorm;
2742     } else {
2743       dx  = xx[col];
2744     }
2745     if (dx == 0.0) dx = 1.0;
2746 #if !defined(PETSC_USE_COMPLEX)
2747     if (dx < umin && dx >= 0.0)      dx = umin;
2748     else if (dx < 0.0 && dx > -umin) dx = -umin;
2749 #else
2750     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2751     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2752 #endif
2753     dx               *= epsilon;
2754     vscale_array[col] = 1.0/dx;
2755   }     CHKMEMQ;
2756   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2757   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2758   if (ctype == IS_COLORING_GLOBAL){
2759     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2760     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2761   }
2762   CHKMEMQ;
2763   if (coloring->vscaleforrow) {
2764     vscaleforrow = coloring->vscaleforrow;
2765   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2766 
2767   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2768   /*
2769     Loop over each color
2770   */
2771   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2772   for (k=0; k<coloring->ncolors; k++) {
2773     coloring->currentcolor = k;
2774     for (i=0; i<bs; i++) {
2775       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2776       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2777       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2778       /*
2779 	Loop over each column associated with color
2780 	adding the perturbation to the vector w3.
2781       */
2782       for (l=0; l<coloring->ncolumns[k]; l++) {
2783 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2784 	if (coloring->htype[0] == 'w') {
2785 	  dx = 1.0 + unorm;
2786 	} else {
2787 	  dx  = xx[col];
2788 	}
2789 	if (dx == 0.0) dx = 1.0;
2790 #if !defined(PETSC_USE_COMPLEX)
2791 	if (dx < umin && dx >= 0.0)      dx = umin;
2792 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2793 #else
2794 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2795 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2796 #endif
2797 	dx            *= epsilon;
2798 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2799 	w3_array[col] += dx;
2800       }
2801       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2802       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2803 
2804       /*
2805 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2806 	w2 = F(x1 + dx) - F(x1)
2807       */
2808       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2809       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2810       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2811       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2812 
2813       /*
2814 	Loop over rows of vector, putting results into Jacobian matrix
2815       */
2816       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2817       for (l=0; l<coloring->nrows[k]; l++) {
2818 	row    = bs*coloring->rows[k][l];             /* local row index */
2819 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2820         for (j=0; j<bs; j++) {
2821   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2822           srows[j]  = row + start + j;
2823         }
2824 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2825       }
2826       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2827     }
2828   } /* endof for each color */
2829   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2830   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2831   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2832   ierr = PetscFree(srows);CHKERRQ(ierr);
2833 
2834   coloring->currentcolor = -1;
2835   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2836   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2837   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2838   PetscFunctionReturn(0);
2839 }
2840 
2841 /* -------------------------------------------------------------------*/
2842 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2843        MatGetRow_SeqBAIJ,
2844        MatRestoreRow_SeqBAIJ,
2845        MatMult_SeqBAIJ_N,
2846 /* 4*/ MatMultAdd_SeqBAIJ_N,
2847        MatMultTranspose_SeqBAIJ,
2848        MatMultTransposeAdd_SeqBAIJ,
2849        0,
2850        0,
2851        0,
2852 /*10*/ 0,
2853        MatLUFactor_SeqBAIJ,
2854        0,
2855        0,
2856        MatTranspose_SeqBAIJ,
2857 /*15*/ MatGetInfo_SeqBAIJ,
2858        MatEqual_SeqBAIJ,
2859        MatGetDiagonal_SeqBAIJ,
2860        MatDiagonalScale_SeqBAIJ,
2861        MatNorm_SeqBAIJ,
2862 /*20*/ 0,
2863        MatAssemblyEnd_SeqBAIJ,
2864        MatSetOption_SeqBAIJ,
2865        MatZeroEntries_SeqBAIJ,
2866 /*24*/ MatZeroRows_SeqBAIJ,
2867        0,
2868        0,
2869        0,
2870        0,
2871 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2872        0,
2873        0,
2874        MatGetArray_SeqBAIJ,
2875        MatRestoreArray_SeqBAIJ,
2876 /*34*/ MatDuplicate_SeqBAIJ,
2877        0,
2878        0,
2879        MatILUFactor_SeqBAIJ,
2880        0,
2881 /*39*/ MatAXPY_SeqBAIJ,
2882        MatGetSubMatrices_SeqBAIJ,
2883        MatIncreaseOverlap_SeqBAIJ,
2884        MatGetValues_SeqBAIJ,
2885        MatCopy_SeqBAIJ,
2886 /*44*/ 0,
2887        MatScale_SeqBAIJ,
2888        0,
2889        0,
2890        0,
2891 /*49*/ MatSetBlockSize_SeqBAIJ,
2892        MatGetRowIJ_SeqBAIJ,
2893        MatRestoreRowIJ_SeqBAIJ,
2894        MatGetColumnIJ_SeqBAIJ,
2895        MatRestoreColumnIJ_SeqBAIJ,
2896 /*54*/ MatFDColoringCreate_SeqAIJ,
2897        0,
2898        0,
2899        0,
2900        MatSetValuesBlocked_SeqBAIJ,
2901 /*59*/ MatGetSubMatrix_SeqBAIJ,
2902        MatDestroy_SeqBAIJ,
2903        MatView_SeqBAIJ,
2904        0,
2905        0,
2906 /*64*/ 0,
2907        0,
2908        0,
2909        0,
2910        0,
2911 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2912        0,
2913        MatConvert_Basic,
2914        0,
2915        0,
2916 /*74*/ 0,
2917        MatFDColoringApply_BAIJ,
2918        0,
2919        0,
2920        0,
2921 /*79*/ 0,
2922        0,
2923        0,
2924        0,
2925        MatLoad_SeqBAIJ,
2926 /*84*/ 0,
2927        0,
2928        0,
2929        0,
2930        0,
2931 /*89*/ 0,
2932        0,
2933        0,
2934        0,
2935        0,
2936 /*94*/ 0,
2937        0,
2938        0,
2939        0,
2940        0,
2941 /*99*/0,
2942        0,
2943        0,
2944        0,
2945        0,
2946 /*104*/0,
2947        MatRealPart_SeqBAIJ,
2948        MatImaginaryPart_SeqBAIJ,
2949        0,
2950        0,
2951 /*109*/0,
2952        0,
2953        0,
2954        0,
2955        MatMissingDiagonal_SeqBAIJ,
2956 /*114*/0,
2957        0,
2958        0,
2959        0,
2960        0,
2961 /*119*/0,
2962        0,
2963        MatMultHermitianTranspose_SeqBAIJ,
2964        MatMultHermitianTransposeAdd_SeqBAIJ,
2965        0,
2966 };
2967 
2968 EXTERN_C_BEGIN
2969 #undef __FUNCT__
2970 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2971 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2972 {
2973   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2974   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2975   PetscErrorCode ierr;
2976 
2977   PetscFunctionBegin;
2978   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2979 
2980   /* allocate space for values if not already there */
2981   if (!aij->saved_values) {
2982     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2983     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2984   }
2985 
2986   /* copy values over */
2987   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2988   PetscFunctionReturn(0);
2989 }
2990 EXTERN_C_END
2991 
2992 EXTERN_C_BEGIN
2993 #undef __FUNCT__
2994 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2995 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2996 {
2997   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2998   PetscErrorCode ierr;
2999   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3000 
3001   PetscFunctionBegin;
3002   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3003   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3004 
3005   /* copy values over */
3006   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3007   PetscFunctionReturn(0);
3008 }
3009 EXTERN_C_END
3010 
3011 EXTERN_C_BEGIN
3012 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3013 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3014 EXTERN_C_END
3015 
3016 EXTERN_C_BEGIN
3017 #undef __FUNCT__
3018 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3019 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3020 {
3021   Mat_SeqBAIJ    *b;
3022   PetscErrorCode ierr;
3023   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3024   PetscBool      flg,skipallocation = PETSC_FALSE;
3025 
3026   PetscFunctionBegin;
3027 
3028   if (nz == MAT_SKIP_ALLOCATION) {
3029     skipallocation = PETSC_TRUE;
3030     nz             = 0;
3031   }
3032 
3033   if (bs < 0) {
3034     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3035       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3036     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3037     bs   = PetscAbs(bs);
3038   }
3039   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3040   bs   = newbs;
3041 
3042   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3043   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3044   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3045   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3046 
3047   B->preallocated = PETSC_TRUE;
3048 
3049   mbs  = B->rmap->n/bs;
3050   nbs  = B->cmap->n/bs;
3051   bs2  = bs*bs;
3052 
3053   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3054 
3055   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3056   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3057   if (nnz) {
3058     for (i=0; i<mbs; i++) {
3059       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3060       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3061     }
3062   }
3063 
3064   b       = (Mat_SeqBAIJ*)B->data;
3065   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3066     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3067   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3068 
3069   if (!flg) {
3070     switch (bs) {
3071     case 1:
3072       B->ops->mult            = MatMult_SeqBAIJ_1;
3073       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3074       B->ops->sor             = MatSOR_SeqBAIJ_1;
3075       break;
3076     case 2:
3077       B->ops->mult            = MatMult_SeqBAIJ_2;
3078       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3079       B->ops->sor             = MatSOR_SeqBAIJ_2;
3080       break;
3081     case 3:
3082       B->ops->mult            = MatMult_SeqBAIJ_3;
3083       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3084       B->ops->sor             = MatSOR_SeqBAIJ_3;
3085       break;
3086     case 4:
3087       B->ops->mult            = MatMult_SeqBAIJ_4;
3088       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3089       B->ops->sor             = MatSOR_SeqBAIJ_4;
3090       break;
3091     case 5:
3092       B->ops->mult            = MatMult_SeqBAIJ_5;
3093       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3094       B->ops->sor             = MatSOR_SeqBAIJ_5;
3095       break;
3096     case 6:
3097       B->ops->mult            = MatMult_SeqBAIJ_6;
3098       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3099       B->ops->sor             = MatSOR_SeqBAIJ_6;
3100       break;
3101     case 7:
3102       B->ops->mult            = MatMult_SeqBAIJ_7;
3103       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3104       B->ops->sor             = MatSOR_SeqBAIJ_7;
3105       break;
3106     case 15:
3107       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3108       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3109       B->ops->sor             = MatSOR_SeqBAIJ_N;
3110       break;
3111     default:
3112       B->ops->mult            = MatMult_SeqBAIJ_N;
3113       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3114       B->ops->sor             = MatSOR_SeqBAIJ_N;
3115       break;
3116     }
3117   }
3118   B->rmap->bs  = bs;
3119   b->mbs       = mbs;
3120   b->nbs       = nbs;
3121   if (!skipallocation) {
3122     if (!b->imax) {
3123       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3124       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3125       b->free_imax_ilen = PETSC_TRUE;
3126     }
3127     /* b->ilen will count nonzeros in each block row so far. */
3128     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3129     if (!nnz) {
3130       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3131       else if (nz < 0) nz = 1;
3132       for (i=0; i<mbs; i++) b->imax[i] = nz;
3133       nz = nz*mbs;
3134     } else {
3135       nz = 0;
3136       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3137     }
3138 
3139     /* allocate the matrix space */
3140     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3141     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3142     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3143     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3144     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3145     b->singlemalloc = PETSC_TRUE;
3146     b->i[0] = 0;
3147     for (i=1; i<mbs+1; i++) {
3148       b->i[i] = b->i[i-1] + b->imax[i-1];
3149     }
3150     b->free_a     = PETSC_TRUE;
3151     b->free_ij    = PETSC_TRUE;
3152   } else {
3153     b->free_a     = PETSC_FALSE;
3154     b->free_ij    = PETSC_FALSE;
3155   }
3156 
3157   B->rmap->bs          = bs;
3158   b->bs2              = bs2;
3159   b->mbs              = mbs;
3160   b->nz               = 0;
3161   b->maxnz            = nz;
3162   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3163   PetscFunctionReturn(0);
3164 }
3165 EXTERN_C_END
3166 
3167 EXTERN_C_BEGIN
3168 #undef __FUNCT__
3169 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3170 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3171 {
3172   PetscInt       i,m,nz,nz_max=0,*nnz;
3173   PetscScalar    *values=0;
3174   PetscErrorCode ierr;
3175 
3176   PetscFunctionBegin;
3177   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3178   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3179   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3180   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3181   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3182   m = B->rmap->n/bs;
3183 
3184   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3185   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3186   for(i=0; i<m; i++) {
3187     nz = ii[i+1]- ii[i];
3188     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3189     nz_max = PetscMax(nz_max, nz);
3190     nnz[i] = nz;
3191   }
3192   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3193   ierr = PetscFree(nnz);CHKERRQ(ierr);
3194 
3195   values = (PetscScalar*)V;
3196   if (!values) {
3197     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3198     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3199   }
3200   for (i=0; i<m; i++) {
3201     PetscInt          ncols  = ii[i+1] - ii[i];
3202     const PetscInt    *icols = jj + ii[i];
3203     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3204     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3205   }
3206   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3207   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3208   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3209 
3210   PetscFunctionReturn(0);
3211 }
3212 EXTERN_C_END
3213 
3214 
3215 EXTERN_C_BEGIN
3216 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3217 #if defined(PETSC_HAVE_MUMPS)
3218 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3219 #endif
3220 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3221 EXTERN_C_END
3222 
3223 /*MC
3224    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3225    block sparse compressed row format.
3226 
3227    Options Database Keys:
3228 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3229 
3230   Level: beginner
3231 
3232 .seealso: MatCreateSeqBAIJ()
3233 M*/
3234 
3235 
3236 EXTERN_C_BEGIN
3237 #undef __FUNCT__
3238 #define __FUNCT__ "MatCreate_SeqBAIJ"
3239 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3240 {
3241   PetscErrorCode ierr;
3242   PetscMPIInt    size;
3243   Mat_SeqBAIJ    *b;
3244 
3245   PetscFunctionBegin;
3246   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3247   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3248 
3249   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3250   B->data = (void*)b;
3251   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3252   B->mapping               = 0;
3253   b->row                   = 0;
3254   b->col                   = 0;
3255   b->icol                  = 0;
3256   b->reallocs              = 0;
3257   b->saved_values          = 0;
3258 
3259   b->roworiented           = PETSC_TRUE;
3260   b->nonew                 = 0;
3261   b->diag                  = 0;
3262   b->solve_work            = 0;
3263   b->mult_work             = 0;
3264   B->spptr                 = 0;
3265   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3266   b->keepnonzeropattern    = PETSC_FALSE;
3267   b->xtoy                  = 0;
3268   b->XtoY                  = 0;
3269   b->compressedrow.use     = PETSC_FALSE;
3270   b->compressedrow.nrows   = 0;
3271   b->compressedrow.i       = PETSC_NULL;
3272   b->compressedrow.rindex  = PETSC_NULL;
3273   b->compressedrow.checked = PETSC_FALSE;
3274   B->same_nonzero          = PETSC_FALSE;
3275 
3276   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3277                                      "MatGetFactorAvailable_seqbaij_petsc",
3278                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3279   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3280                                      "MatGetFactor_seqbaij_petsc",
3281                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3282 #if defined(PETSC_HAVE_MUMPS)
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3284 #endif
3285   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3286                                      "MatInvertBlockDiagonal_SeqBAIJ",
3287                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3288   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3289                                      "MatStoreValues_SeqBAIJ",
3290                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3291   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3292                                      "MatRetrieveValues_SeqBAIJ",
3293                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3294   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3295                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3296                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3297   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3298                                      "MatConvert_SeqBAIJ_SeqAIJ",
3299                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3300   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3301                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3302                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3303   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3304                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3305                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3306   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3307                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3308                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3309   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3310   PetscFunctionReturn(0);
3311 }
3312 EXTERN_C_END
3313 
3314 #undef __FUNCT__
3315 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3316 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3317 {
3318   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3319   PetscErrorCode ierr;
3320   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3321 
3322   PetscFunctionBegin;
3323   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3324 
3325   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3326     c->imax = a->imax;
3327     c->ilen = a->ilen;
3328     c->free_imax_ilen = PETSC_FALSE;
3329   } else {
3330     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3331     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3332     for (i=0; i<mbs; i++) {
3333       c->imax[i] = a->imax[i];
3334       c->ilen[i] = a->ilen[i];
3335     }
3336     c->free_imax_ilen = PETSC_TRUE;
3337   }
3338 
3339   /* allocate the matrix space */
3340   if (mallocmatspace){
3341     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3342       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3343       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3344       c->singlemalloc = PETSC_FALSE;
3345       c->free_ij      = PETSC_FALSE;
3346       c->i            = a->i;
3347       c->j            = a->j;
3348       c->parent       = A;
3349       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3350       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3351       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3352     } else {
3353       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3354       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3355       c->singlemalloc = PETSC_TRUE;
3356       c->free_ij      = PETSC_TRUE;
3357       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3358       if (mbs > 0) {
3359 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3360 	if (cpvalues == MAT_COPY_VALUES) {
3361 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3362 	} else {
3363 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3364 	}
3365       }
3366     }
3367   }
3368 
3369   c->roworiented = a->roworiented;
3370   c->nonew       = a->nonew;
3371   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3372   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3373   c->bs2         = a->bs2;
3374   c->mbs         = a->mbs;
3375   c->nbs         = a->nbs;
3376 
3377   if (a->diag) {
3378     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3379       c->diag      = a->diag;
3380       c->free_diag = PETSC_FALSE;
3381     } else {
3382       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3383       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3384       for (i=0; i<mbs; i++) {
3385         c->diag[i] = a->diag[i];
3386       }
3387       c->free_diag = PETSC_TRUE;
3388     }
3389   } else c->diag        = 0;
3390   c->nz                 = a->nz;
3391   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3392   c->solve_work         = 0;
3393   c->mult_work          = 0;
3394   c->free_a             = PETSC_TRUE;
3395   c->free_ij            = PETSC_TRUE;
3396   C->preallocated       = PETSC_TRUE;
3397   C->assembled          = PETSC_TRUE;
3398 
3399   c->compressedrow.use     = a->compressedrow.use;
3400   c->compressedrow.nrows   = a->compressedrow.nrows;
3401   c->compressedrow.checked = a->compressedrow.checked;
3402   if (a->compressedrow.checked && a->compressedrow.use){
3403     i = a->compressedrow.nrows;
3404     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3405     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3406     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3407     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3408   } else {
3409     c->compressedrow.use    = PETSC_FALSE;
3410     c->compressedrow.i      = PETSC_NULL;
3411     c->compressedrow.rindex = PETSC_NULL;
3412   }
3413   C->same_nonzero = A->same_nonzero;
3414   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3415   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3416   PetscFunctionReturn(0);
3417 }
3418 
3419 #undef __FUNCT__
3420 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3421 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3422 {
3423     PetscErrorCode ierr;
3424 
3425   PetscFunctionBegin;
3426   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3427   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3428   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3429   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 #undef __FUNCT__
3434 #define __FUNCT__ "MatLoad_SeqBAIJ"
3435 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3436 {
3437   Mat_SeqBAIJ    *a;
3438   PetscErrorCode ierr;
3439   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3440   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3441   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3442   PetscInt       *masked,nmask,tmp,bs2,ishift;
3443   PetscMPIInt    size;
3444   int            fd;
3445   PetscScalar    *aa;
3446   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3447 
3448   PetscFunctionBegin;
3449   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3450   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3451   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3452   bs2  = bs*bs;
3453 
3454   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3455   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3456   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3457   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3458   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3459   M = header[1]; N = header[2]; nz = header[3];
3460 
3461   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3462   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3463 
3464   /*
3465      This code adds extra rows to make sure the number of rows is
3466     divisible by the blocksize
3467   */
3468   mbs        = M/bs;
3469   extra_rows = bs - M + bs*(mbs);
3470   if (extra_rows == bs) extra_rows = 0;
3471   else                  mbs++;
3472   if (extra_rows) {
3473     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3474   }
3475 
3476   /* Set global sizes if not already set */
3477   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3478     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3479   } else { /* Check if the matrix global sizes are correct */
3480     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3481     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3482   }
3483 
3484   /* read in row lengths */
3485   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3486   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3487   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3488 
3489   /* read in column indices */
3490   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3491   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3492   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3493 
3494   /* loop over row lengths determining block row lengths */
3495   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3496   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3497   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3498   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3499   rowcount = 0;
3500   nzcount = 0;
3501   for (i=0; i<mbs; i++) {
3502     nmask = 0;
3503     for (j=0; j<bs; j++) {
3504       kmax = rowlengths[rowcount];
3505       for (k=0; k<kmax; k++) {
3506         tmp = jj[nzcount++]/bs;
3507         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3508       }
3509       rowcount++;
3510     }
3511     browlengths[i] += nmask;
3512     /* zero out the mask elements we set */
3513     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3514   }
3515 
3516   /* Do preallocation  */
3517   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3518   a = (Mat_SeqBAIJ*)newmat->data;
3519 
3520   /* set matrix "i" values */
3521   a->i[0] = 0;
3522   for (i=1; i<= mbs; i++) {
3523     a->i[i]      = a->i[i-1] + browlengths[i-1];
3524     a->ilen[i-1] = browlengths[i-1];
3525   }
3526   a->nz         = 0;
3527   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3528 
3529   /* read in nonzero values */
3530   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3531   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3532   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3533 
3534   /* set "a" and "j" values into matrix */
3535   nzcount = 0; jcount = 0;
3536   for (i=0; i<mbs; i++) {
3537     nzcountb = nzcount;
3538     nmask    = 0;
3539     for (j=0; j<bs; j++) {
3540       kmax = rowlengths[i*bs+j];
3541       for (k=0; k<kmax; k++) {
3542         tmp = jj[nzcount++]/bs;
3543 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3544       }
3545     }
3546     /* sort the masked values */
3547     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3548 
3549     /* set "j" values into matrix */
3550     maskcount = 1;
3551     for (j=0; j<nmask; j++) {
3552       a->j[jcount++]  = masked[j];
3553       mask[masked[j]] = maskcount++;
3554     }
3555     /* set "a" values into matrix */
3556     ishift = bs2*a->i[i];
3557     for (j=0; j<bs; j++) {
3558       kmax = rowlengths[i*bs+j];
3559       for (k=0; k<kmax; k++) {
3560         tmp       = jj[nzcountb]/bs ;
3561         block     = mask[tmp] - 1;
3562         point     = jj[nzcountb] - bs*tmp;
3563         idx       = ishift + bs2*block + j + bs*point;
3564         a->a[idx] = (MatScalar)aa[nzcountb++];
3565       }
3566     }
3567     /* zero out the mask elements we set */
3568     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3569   }
3570   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3571 
3572   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3573   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3574   ierr = PetscFree(aa);CHKERRQ(ierr);
3575   ierr = PetscFree(jj);CHKERRQ(ierr);
3576   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3577 
3578   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3579   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3580   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3581   PetscFunctionReturn(0);
3582 }
3583 
3584 #undef __FUNCT__
3585 #define __FUNCT__ "MatCreateSeqBAIJ"
3586 /*@C
3587    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3588    compressed row) format.  For good matrix assembly performance the
3589    user should preallocate the matrix storage by setting the parameter nz
3590    (or the array nnz).  By setting these parameters accurately, performance
3591    during matrix assembly can be increased by more than a factor of 50.
3592 
3593    Collective on MPI_Comm
3594 
3595    Input Parameters:
3596 +  comm - MPI communicator, set to PETSC_COMM_SELF
3597 .  bs - size of block
3598 .  m - number of rows
3599 .  n - number of columns
3600 .  nz - number of nonzero blocks  per block row (same for all rows)
3601 -  nnz - array containing the number of nonzero blocks in the various block rows
3602          (possibly different for each block row) or PETSC_NULL
3603 
3604    Output Parameter:
3605 .  A - the matrix
3606 
3607    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3608    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3609    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3610 
3611    Options Database Keys:
3612 .   -mat_no_unroll - uses code that does not unroll the loops in the
3613                      block calculations (much slower)
3614 .    -mat_block_size - size of the blocks to use
3615 
3616    Level: intermediate
3617 
3618    Notes:
3619    The number of rows and columns must be divisible by blocksize.
3620 
3621    If the nnz parameter is given then the nz parameter is ignored
3622 
3623    A nonzero block is any block that as 1 or more nonzeros in it
3624 
3625    The block AIJ format is fully compatible with standard Fortran 77
3626    storage.  That is, the stored row and column indices can begin at
3627    either one (as in Fortran) or zero.  See the users' manual for details.
3628 
3629    Specify the preallocated storage with either nz or nnz (not both).
3630    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3631    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3632    matrices.
3633 
3634 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3635 @*/
3636 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3637 {
3638   PetscErrorCode ierr;
3639 
3640   PetscFunctionBegin;
3641   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3642   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3643   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3644   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3645   PetscFunctionReturn(0);
3646 }
3647 
3648 #undef __FUNCT__
3649 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3650 /*@C
3651    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3652    per row in the matrix. For good matrix assembly performance the
3653    user should preallocate the matrix storage by setting the parameter nz
3654    (or the array nnz).  By setting these parameters accurately, performance
3655    during matrix assembly can be increased by more than a factor of 50.
3656 
3657    Collective on MPI_Comm
3658 
3659    Input Parameters:
3660 +  A - the matrix
3661 .  bs - size of block
3662 .  nz - number of block nonzeros per block row (same for all rows)
3663 -  nnz - array containing the number of block nonzeros in the various block rows
3664          (possibly different for each block row) or PETSC_NULL
3665 
3666    Options Database Keys:
3667 .   -mat_no_unroll - uses code that does not unroll the loops in the
3668                      block calculations (much slower)
3669 .    -mat_block_size - size of the blocks to use
3670 
3671    Level: intermediate
3672 
3673    Notes:
3674    If the nnz parameter is given then the nz parameter is ignored
3675 
3676    You can call MatGetInfo() to get information on how effective the preallocation was;
3677    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3678    You can also run with the option -info and look for messages with the string
3679    malloc in them to see if additional memory allocation was needed.
3680 
3681    The block AIJ format is fully compatible with standard Fortran 77
3682    storage.  That is, the stored row and column indices can begin at
3683    either one (as in Fortran) or zero.  See the users' manual for details.
3684 
3685    Specify the preallocated storage with either nz or nnz (not both).
3686    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3687    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3688 
3689 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3690 @*/
3691 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3692 {
3693   PetscErrorCode ierr;
3694 
3695   PetscFunctionBegin;
3696   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3697   PetscFunctionReturn(0);
3698 }
3699 
3700 #undef __FUNCT__
3701 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3702 /*@C
3703    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3704    (the default sequential PETSc format).
3705 
3706    Collective on MPI_Comm
3707 
3708    Input Parameters:
3709 +  A - the matrix
3710 .  i - the indices into j for the start of each local row (starts with zero)
3711 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3712 -  v - optional values in the matrix
3713 
3714    Level: developer
3715 
3716 .keywords: matrix, aij, compressed row, sparse
3717 
3718 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3719 @*/
3720 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3721 {
3722   PetscErrorCode ierr;
3723 
3724   PetscFunctionBegin;
3725   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3726   PetscFunctionReturn(0);
3727 }
3728 
3729 
3730 #undef __FUNCT__
3731 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3732 /*@
3733      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3734 
3735      Collective on MPI_Comm
3736 
3737    Input Parameters:
3738 +  comm - must be an MPI communicator of size 1
3739 .  bs - size of block
3740 .  m - number of rows
3741 .  n - number of columns
3742 .  i - row indices
3743 .  j - column indices
3744 -  a - matrix values
3745 
3746    Output Parameter:
3747 .  mat - the matrix
3748 
3749    Level: advanced
3750 
3751    Notes:
3752        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3753     once the matrix is destroyed
3754 
3755        You cannot set new nonzero locations into this matrix, that will generate an error.
3756 
3757        The i and j indices are 0 based
3758 
3759        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3760 
3761 
3762 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3763 
3764 @*/
3765 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3766 {
3767   PetscErrorCode ierr;
3768   PetscInt       ii;
3769   Mat_SeqBAIJ    *baij;
3770 
3771   PetscFunctionBegin;
3772   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3773   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3774 
3775   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3776   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3777   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3778   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3779   baij = (Mat_SeqBAIJ*)(*mat)->data;
3780   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3781   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3782 
3783   baij->i = i;
3784   baij->j = j;
3785   baij->a = a;
3786   baij->singlemalloc = PETSC_FALSE;
3787   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3788   baij->free_a       = PETSC_FALSE;
3789   baij->free_ij       = PETSC_FALSE;
3790 
3791   for (ii=0; ii<m; ii++) {
3792     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3793 #if defined(PETSC_USE_DEBUG)
3794     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3795 #endif
3796   }
3797 #if defined(PETSC_USE_DEBUG)
3798   for (ii=0; ii<baij->i[m]; ii++) {
3799     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3800     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3801   }
3802 #endif
3803 
3804   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3805   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3806   PetscFunctionReturn(0);
3807 }
3808