xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f3fe499b4cc4d64bf04aa4f5e4963dcc4eb56541)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"  /*I   "petscmat.h"  I*/
8 #include "petscblaslapack.h"
9 #include "../src/mat/blockinvert.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
13 /*@
14   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
15 
16   Collective on Mat
17 
18   Input Parameters:
19 . mat - the matrix
20 
21   Level: advanced
22 @*/
23 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
24 {
25   PetscErrorCode ierr,(*f)(Mat);
26 
27   PetscFunctionBegin;
28   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
29   if (!mat->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
30   if (mat->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
31 
32   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
33   if (f) {
34     ierr = (*f)(mat);CHKERRQ(ierr);
35   } else {
36     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
37   }
38   PetscFunctionReturn(0);
39 }
40 
41 EXTERN_C_BEGIN
42 #undef __FUNCT__
43 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
44 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
45 {
46   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
47   PetscErrorCode ierr;
48   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
49   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
50   PetscReal      shift = 0.0;
51 
52   PetscFunctionBegin;
53   if (a->idiagvalid) PetscFunctionReturn(0);
54   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
55   diag_offset = a->diag;
56   if (!a->idiag) {
57     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
58     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs2*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 1:
65       for (i=0; i<mbs; i++) {
66         odiag = v + 1*diag_offset[i];
67         diag[0]  = odiag[0];
68         mdiag[0] = odiag[0];
69         diag[0]  = 1.0 / (diag[0] + shift);
70         diag    += 1;
71         mdiag   += 1;
72       }
73       break;
74     case 2:
75       for (i=0; i<mbs; i++) {
76         odiag   = v + 4*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
79 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
80 	diag    += 4;
81 	mdiag   += 4;
82       }
83       break;
84     case 3:
85       for (i=0; i<mbs; i++) {
86         odiag    = v + 9*diag_offset[i];
87         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
88         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
89         diag[8]  = odiag[8];
90         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
91         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
92         mdiag[8] = odiag[8];
93 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
94 	diag    += 9;
95 	mdiag   += 9;
96       }
97       break;
98     case 4:
99       for (i=0; i<mbs; i++) {
100         odiag  = v + 16*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
104 	diag  += 16;
105 	mdiag += 16;
106       }
107       break;
108     case 5:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 25*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
114 	diag  += 25;
115 	mdiag += 25;
116       }
117       break;
118     case 6:
119       for (i=0; i<mbs; i++) {
120         odiag = v + 36*diag_offset[i];
121         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
122         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
124 	diag  += 36;
125 	mdiag += 36;
126       }
127       break;
128     case 7:
129       for (i=0; i<mbs; i++) {
130         odiag = v + 49*diag_offset[i];
131         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
132         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
133 	ierr   = Kernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
134 	diag  += 49;
135 	mdiag += 49;
136       }
137       break;
138     default:
139       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
140       for (i=0; i<mbs; i++) {
141         odiag = v + bs2*diag_offset[i];
142         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
143         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
144         ierr   = Kernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
145 	diag  += bs2;
146 	mdiag += bs2;
147       }
148       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
149   }
150   a->idiagvalid = PETSC_TRUE;
151   PetscFunctionReturn(0);
152 }
153 EXTERN_C_END
154 
155 #undef __FUNCT__
156 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
157 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
158 {
159   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
160   PetscScalar        *x,x1,s1;
161   const PetscScalar  *b;
162   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
163   PetscErrorCode     ierr;
164   PetscInt           m = a->mbs,i,i2,nz,j;
165   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
166 
167   PetscFunctionBegin;
168   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
169   its = its*lits;
170   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
171   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
172   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
173   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
174   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
175 
176   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
177 
178   diag  = a->diag;
179   idiag = a->idiag;
180   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
181   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
182 
183   if (flag & SOR_ZERO_INITIAL_GUESS) {
184     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
185       x[0] = b[0]*idiag[0];
186       i2     = 1;
187       idiag += 1;
188       for (i=1; i<m; i++) {
189         v     = aa + ai[i];
190         vi    = aj + ai[i];
191         nz    = diag[i] - ai[i];
192         s1    = b[i2];
193         for (j=0; j<nz; j++) {
194           s1 -= v[j]*x[vi[j]];
195         }
196         x[i2]   = idiag[0]*s1;
197         idiag   += 1;
198         i2      += 1;
199       }
200       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
201       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
202     }
203     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
204         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
205       i2    = 0;
206       mdiag = a->idiag+a->mbs;
207       for (i=0; i<m; i++) {
208         x1      = x[i2];
209         x[i2]   = mdiag[0]*x1;
210         mdiag  += 1;
211         i2     += 1;
212       }
213       ierr = PetscLogFlops(m);CHKERRQ(ierr);
214     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
215       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
216     }
217     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
218       idiag   = a->idiag+a->mbs - 1;
219       i2      = m - 1;
220       x1      = x[i2];
221       x[i2]   = idiag[0]*x1;
222       idiag -= 1;
223       i2    -= 1;
224       for (i=m-2; i>=0; i--) {
225         v     = aa + (diag[i]+1);
226         vi    = aj + diag[i] + 1;
227         nz    = ai[i+1] - diag[i] - 1;
228         s1    = x[i2];
229         for (j=0; j<nz; j++) {
230           s1 -= v[j]*x[vi[j]];
231         }
232         x[i2]   = idiag[0]*s1;
233         idiag   -= 1;
234         i2      -= 1;
235       }
236       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
237     }
238   } else {
239     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
240   }
241   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
243   PetscFunctionReturn(0);
244 }
245 
246 #undef __FUNCT__
247 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
248 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
249 {
250   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
251   PetscScalar        *x,x1,x2,s1,s2;
252   const PetscScalar  *b;
253   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
254   PetscErrorCode     ierr;
255   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
256   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
257 
258   PetscFunctionBegin;
259   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
260   its = its*lits;
261   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
262   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
263   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
264   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
265   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
266 
267   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
268 
269   diag  = a->diag;
270   idiag = a->idiag;
271   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
272   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
273 
274   if (flag & SOR_ZERO_INITIAL_GUESS) {
275     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
276       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
277       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
278       i2     = 2;
279       idiag += 4;
280       for (i=1; i<m; i++) {
281 	v     = aa + 4*ai[i];
282 	vi    = aj + ai[i];
283 	nz    = diag[i] - ai[i];
284 	s1    = b[i2]; s2 = b[i2+1];
285         for (j=0; j<nz; j++) {
286 	  idx  = 2*vi[j];
287           it   = 4*j;
288 	  x1   = x[idx]; x2 = x[1+idx];
289 	  s1  -= v[it]*x1 + v[it+2]*x2;
290 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
291 	}
292 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
293 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
294         idiag   += 4;
295         i2      += 2;
296       }
297       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
298       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
299     }
300     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
301         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
302       i2    = 0;
303       mdiag = a->idiag+4*a->mbs;
304       for (i=0; i<m; i++) {
305         x1      = x[i2]; x2 = x[i2+1];
306         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
307         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
308         mdiag  += 4;
309         i2     += 2;
310       }
311       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
312     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
313       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
314     }
315     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
316       idiag   = a->idiag+4*a->mbs - 4;
317       i2      = 2*m - 2;
318       x1      = x[i2]; x2 = x[i2+1];
319       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
320       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
321       idiag -= 4;
322       i2    -= 2;
323       for (i=m-2; i>=0; i--) {
324 	v     = aa + 4*(diag[i]+1);
325 	vi    = aj + diag[i] + 1;
326 	nz    = ai[i+1] - diag[i] - 1;
327 	s1    = x[i2]; s2 = x[i2+1];
328         for (j=0; j<nz; j++) {
329  	  idx  = 2*vi[j];
330           it   = 4*j;
331 	  x1   = x[idx]; x2 = x[1+idx];
332 	  s1  -= v[it]*x1 + v[it+2]*x2;
333 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
334 	}
335 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
336 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
337         idiag   -= 4;
338         i2      -= 2;
339       }
340       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
341     }
342   } else {
343     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
344   }
345   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
346   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
347   PetscFunctionReturn(0);
348 }
349 
350 #undef __FUNCT__
351 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
352 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
353 {
354   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
355   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
356   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
357   const PetscScalar  *b;
358   PetscErrorCode     ierr;
359   PetscInt           m = a->mbs,i,i2,nz,idx;
360   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
361 
362   PetscFunctionBegin;
363   its = its*lits;
364   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
365   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
366   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
367   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
368   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
369   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
370 
371   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
372 
373   diag  = a->diag;
374   idiag = a->idiag;
375   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
376   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
377 
378   if (flag & SOR_ZERO_INITIAL_GUESS) {
379     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
380       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
381       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
382       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
383       i2     = 3;
384       idiag += 9;
385       for (i=1; i<m; i++) {
386         v     = aa + 9*ai[i];
387         vi    = aj + ai[i];
388         nz    = diag[i] - ai[i];
389         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
390         while (nz--) {
391           idx  = 3*(*vi++);
392           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
393           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
394           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
395           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
396           v   += 9;
397         }
398         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
399         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
400         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
401         idiag   += 9;
402         i2      += 3;
403       }
404       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
405       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
406     }
407     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
408         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
409       i2    = 0;
410       mdiag = a->idiag+9*a->mbs;
411       for (i=0; i<m; i++) {
412         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
413         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
414         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
415         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
416         mdiag  += 9;
417         i2     += 3;
418       }
419       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
420     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
421       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
422     }
423     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
424       idiag   = a->idiag+9*a->mbs - 9;
425       i2      = 3*m - 3;
426       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
427       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
428       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
429       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
430       idiag -= 9;
431       i2    -= 3;
432       for (i=m-2; i>=0; i--) {
433         v     = aa + 9*(diag[i]+1);
434         vi    = aj + diag[i] + 1;
435         nz    = ai[i+1] - diag[i] - 1;
436         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
437         while (nz--) {
438           idx  = 3*(*vi++);
439           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
440           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
441           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
442           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
443           v   += 9;
444         }
445         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
446         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
447         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
448         idiag   -= 9;
449         i2      -= 3;
450       }
451       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
452     }
453   } else {
454     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
455   }
456   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
457   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
458   PetscFunctionReturn(0);
459 }
460 
461 #undef __FUNCT__
462 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
463 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
464 {
465   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
466   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
467   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
468   const PetscScalar  *b;
469   PetscErrorCode     ierr;
470   PetscInt           m = a->mbs,i,i2,nz,idx;
471   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
472 
473   PetscFunctionBegin;
474   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
475   its = its*lits;
476   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
477   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
478   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
479   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
480   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
481 
482   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
483 
484   diag  = a->diag;
485   idiag = a->idiag;
486   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
487   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
488 
489   if (flag & SOR_ZERO_INITIAL_GUESS) {
490     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
491       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
492       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
493       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
494       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
495       i2     = 4;
496       idiag += 16;
497       for (i=1; i<m; i++) {
498 	v     = aa + 16*ai[i];
499 	vi    = aj + ai[i];
500 	nz    = diag[i] - ai[i];
501 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
502 	while (nz--) {
503 	  idx  = 4*(*vi++);
504 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
505 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
506 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
507 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
508 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
509 	  v   += 16;
510 	}
511 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
512 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
513 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
514 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
515         idiag   += 16;
516         i2      += 4;
517       }
518       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
519       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
520     }
521     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
522         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
523       i2    = 0;
524       mdiag = a->idiag+16*a->mbs;
525       for (i=0; i<m; i++) {
526         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
527         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
528         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
529         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
530         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
531         mdiag  += 16;
532         i2     += 4;
533       }
534       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
535     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
536       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
537     }
538     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
539       idiag   = a->idiag+16*a->mbs - 16;
540       i2      = 4*m - 4;
541       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
542       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
543       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
544       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
545       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
546       idiag -= 16;
547       i2    -= 4;
548       for (i=m-2; i>=0; i--) {
549 	v     = aa + 16*(diag[i]+1);
550 	vi    = aj + diag[i] + 1;
551 	nz    = ai[i+1] - diag[i] - 1;
552 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
553 	while (nz--) {
554 	  idx  = 4*(*vi++);
555 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
556 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
557 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
558 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
559 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
560 	  v   += 16;
561 	}
562 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
563 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
564 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
565 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
566         idiag   -= 16;
567         i2      -= 4;
568       }
569       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
570     }
571   } else {
572     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
573   }
574   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
575   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
576   PetscFunctionReturn(0);
577 }
578 
579 #undef __FUNCT__
580 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
581 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
582 {
583   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
584   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
585   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
586   const PetscScalar  *b;
587   PetscErrorCode     ierr;
588   PetscInt           m = a->mbs,i,i2,nz,idx;
589   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
590 
591   PetscFunctionBegin;
592   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
593   its = its*lits;
594   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
595   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
596   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
597   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
598   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
599 
600   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
601 
602   diag  = a->diag;
603   idiag = a->idiag;
604   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
605   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
606 
607   if (flag & SOR_ZERO_INITIAL_GUESS) {
608     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
609       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
610       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
611       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
612       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
613       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
614       i2     = 5;
615       idiag += 25;
616       for (i=1; i<m; i++) {
617 	v     = aa + 25*ai[i];
618 	vi    = aj + ai[i];
619 	nz    = diag[i] - ai[i];
620 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
621 	while (nz--) {
622 	  idx  = 5*(*vi++);
623 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
624 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
625 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
626 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
627 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
628 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
629 	  v   += 25;
630 	}
631 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
632 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
633 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
634 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
635 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
636         idiag   += 25;
637         i2      += 5;
638       }
639       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
640       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
641     }
642     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
643         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
644       i2    = 0;
645       mdiag = a->idiag+25*a->mbs;
646       for (i=0; i<m; i++) {
647         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
648         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
649         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
650         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
651         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
652         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
653         mdiag  += 25;
654         i2     += 5;
655       }
656       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
657     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
658       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
659     }
660     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
661       idiag   = a->idiag+25*a->mbs - 25;
662       i2      = 5*m - 5;
663       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
664       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
665       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
666       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
667       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
668       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
669       idiag -= 25;
670       i2    -= 5;
671       for (i=m-2; i>=0; i--) {
672 	v     = aa + 25*(diag[i]+1);
673 	vi    = aj + diag[i] + 1;
674 	nz    = ai[i+1] - diag[i] - 1;
675 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
676 	while (nz--) {
677 	  idx  = 5*(*vi++);
678 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
679 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
680 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
681 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
682 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
683 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
684 	  v   += 25;
685 	}
686 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
687 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
688 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
689 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
690 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
691         idiag   -= 25;
692         i2      -= 5;
693       }
694       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
695     }
696   } else {
697     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
698   }
699   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
700   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
701   PetscFunctionReturn(0);
702 }
703 
704 #undef __FUNCT__
705 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
706 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
707 {
708   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
709   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
710   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
711   const PetscScalar  *b;
712   PetscErrorCode     ierr;
713   PetscInt           m = a->mbs,i,i2,nz,idx;
714   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
715 
716   PetscFunctionBegin;
717   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
718   its = its*lits;
719   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
720   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
721   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
722   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
723   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
724 
725   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
726 
727   diag  = a->diag;
728   idiag = a->idiag;
729   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
730   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
731 
732   if (flag & SOR_ZERO_INITIAL_GUESS) {
733     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
734       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
735       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
736       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
737       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
738       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
739       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
740       i2     = 6;
741       idiag += 36;
742       for (i=1; i<m; i++) {
743         v     = aa + 36*ai[i];
744         vi    = aj + ai[i];
745         nz    = diag[i] - ai[i];
746         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
747         while (nz--) {
748           idx  = 6*(*vi++);
749           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
750           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
751           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
752           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
753           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
754           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
755           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
756           v   += 36;
757         }
758         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
759         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
760         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
761         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
762         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
763         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
764         idiag   += 36;
765         i2      += 6;
766       }
767       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
768       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
769     }
770     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
771         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
772       i2    = 0;
773       mdiag = a->idiag+36*a->mbs;
774       for (i=0; i<m; i++) {
775         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
776         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
777         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
778         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
779         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
780         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
781         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
782         mdiag  += 36;
783         i2     += 6;
784       }
785       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
786     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
787       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
788     }
789     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
790       idiag   = a->idiag+36*a->mbs - 36;
791       i2      = 6*m - 6;
792       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
793       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
794       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
795       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
796       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
797       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
798       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
799       idiag -= 36;
800       i2    -= 6;
801       for (i=m-2; i>=0; i--) {
802         v     = aa + 36*(diag[i]+1);
803         vi    = aj + diag[i] + 1;
804         nz    = ai[i+1] - diag[i] - 1;
805         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
806         while (nz--) {
807           idx  = 6*(*vi++);
808           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
809           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
810           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
811           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
812           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
813           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
814           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
815           v   += 36;
816         }
817         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
818         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
819         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
820         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
821         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
822         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
823         idiag   -= 36;
824         i2      -= 6;
825       }
826       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
827     }
828   } else {
829     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
830   }
831   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
832   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
833   PetscFunctionReturn(0);
834 }
835 
836 #undef __FUNCT__
837 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
838 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
839 {
840   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
841   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
842   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
843   const PetscScalar  *b;
844   PetscErrorCode     ierr;
845   PetscInt           m = a->mbs,i,i2,nz,idx;
846   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
847 
848   PetscFunctionBegin;
849   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
850   its = its*lits;
851   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
852   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
853   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
854   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
855   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
856 
857   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
858 
859   diag  = a->diag;
860   idiag = a->idiag;
861   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
862   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
863 
864   if (flag & SOR_ZERO_INITIAL_GUESS) {
865     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
866       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
867       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
868       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
869       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
870       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
871       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
872       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
873       i2     = 7;
874       idiag += 49;
875       for (i=1; i<m; i++) {
876         v     = aa + 49*ai[i];
877         vi    = aj + ai[i];
878         nz    = diag[i] - ai[i];
879         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
880         while (nz--) {
881           idx  = 7*(*vi++);
882           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
883           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
884           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
885           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
886           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
887           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
888           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
889           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
890           v   += 49;
891         }
892         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
893         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
894         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
895         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
896         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
897         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
898         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
899         idiag   += 49;
900         i2      += 7;
901       }
902       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
903       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
904     }
905     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
906         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
907       i2    = 0;
908       mdiag = a->idiag+49*a->mbs;
909       for (i=0; i<m; i++) {
910         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
911         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
912         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
913         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
914         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
915         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
916         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
917         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
918         mdiag  += 49;
919         i2     += 7;
920       }
921       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
922     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
923       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
924     }
925     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
926       idiag   = a->idiag+49*a->mbs - 49;
927       i2      = 7*m - 7;
928       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
929       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
930       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
931       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
932       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
933       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
934       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
935       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
936       idiag -= 49;
937       i2    -= 7;
938       for (i=m-2; i>=0; i--) {
939         v     = aa + 49*(diag[i]+1);
940         vi    = aj + diag[i] + 1;
941         nz    = ai[i+1] - diag[i] - 1;
942         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
943         while (nz--) {
944           idx  = 7*(*vi++);
945           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
946           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
947           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
948           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
949           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
950           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
951           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
952           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
953           v   += 49;
954         }
955         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
956         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
957         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
958         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
959         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
960         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
961         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
962         idiag   -= 49;
963         i2      -= 7;
964       }
965       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
966     }
967   } else {
968     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
969   }
970   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
971   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
977 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
978 {
979   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
980   PetscScalar        *x,*work,*w,*workt;
981   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
982   const PetscScalar  *b;
983   PetscErrorCode     ierr;
984   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
985   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
986 
987   PetscFunctionBegin;
988   its = its*lits;
989   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
990   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
991   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
992   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
993   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
994   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
995 
996   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
997 
998   diag  = a->diag;
999   idiag = a->idiag;
1000   if (!a->mult_work) {
1001     k    = PetscMax(A->rmap->n,A->cmap->n);
1002     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
1003   }
1004   work = a->mult_work;
1005   if (!a->sor_work) {
1006     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
1007   }
1008   w = a->sor_work;
1009 
1010   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1011   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
1012 
1013   if (flag & SOR_ZERO_INITIAL_GUESS) {
1014     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
1015       Kernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
1016       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
1017       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
1018       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
1019       i2     = bs;
1020       idiag += bs2;
1021       for (i=1; i<m; i++) {
1022         v     = aa + bs2*ai[i];
1023         vi    = aj + ai[i];
1024         nz    = diag[i] - ai[i];
1025 
1026         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1027 	/* copy all rows of x that are needed into contiguous space */
1028         workt = work;
1029         for (j=0; j<nz; j++) {
1030           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1031           workt += bs;
1032         }
1033         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1034        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1035         while (nz--) {
1036           idx  = N*(*vi++);
1037           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1038           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1039           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1040           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1041           v   += N2;
1042 	  } */
1043 
1044         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1045         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1046         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1047         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1048 
1049         idiag   += bs2;
1050         i2      += bs;
1051       }
1052       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1053       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1054     }
1055     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1056         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1057       i2    = 0;
1058       mdiag = a->idiag+bs2*a->mbs;
1059       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1060       for (i=0; i<m; i++) {
1061         Kernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1062         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1063         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1064         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1065         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1066 
1067         mdiag  += bs2;
1068         i2     += bs;
1069       }
1070       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1071     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1072       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1073     }
1074     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
1075       idiag   = a->idiag+bs2*a->mbs - bs2;
1076       i2      = bs*m - bs;
1077       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1078       Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1079       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1080       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1081       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1082       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1083       idiag -= bs2;
1084       i2    -= bs;
1085       for (i=m-2; i>=0; i--) {
1086         v     = aa + bs2*(diag[i]+1);
1087         vi    = aj + diag[i] + 1;
1088         nz    = ai[i+1] - diag[i] - 1;
1089 
1090         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1091 	/* copy all rows of x that are needed into contiguous space */
1092         workt = work;
1093         for (j=0; j<nz; j++) {
1094           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1095           workt += bs;
1096         }
1097         Kernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1098         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1099         while (nz--) {
1100           idx  = N*(*vi++);
1101           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1102           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1103           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1104           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1105           v   += N2;
1106 	  } */
1107 
1108         Kernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1109         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1110         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1111         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1112         idiag   -= bs2;
1113         i2      -= bs;
1114       }
1115       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1116     }
1117   } else {
1118     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1119   }
1120   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1121   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1122   PetscFunctionReturn(0);
1123 }
1124 
1125 /*
1126     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1127 */
1128 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1129 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1130 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1131 #define matsetvaluesblocked4_ matsetvaluesblocked4
1132 #endif
1133 
1134 EXTERN_C_BEGIN
1135 #undef __FUNCT__
1136 #define __FUNCT__ "matsetvaluesblocked4_"
1137 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1138 {
1139   Mat               A = *AA;
1140   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1141   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1142   PetscInt          *ai=a->i,*ailen=a->ilen;
1143   PetscInt          *aj=a->j,stepval,lastcol = -1;
1144   const PetscScalar *value = v;
1145   MatScalar         *ap,*aa = a->a,*bap;
1146 
1147   PetscFunctionBegin;
1148   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1149   stepval = (n-1)*4;
1150   for (k=0; k<m; k++) { /* loop over added rows */
1151     row  = im[k];
1152     rp   = aj + ai[row];
1153     ap   = aa + 16*ai[row];
1154     nrow = ailen[row];
1155     low  = 0;
1156     high = nrow;
1157     for (l=0; l<n; l++) { /* loop over added columns */
1158       col = in[l];
1159       if (col <= lastcol) low = 0; else high = nrow;
1160       lastcol = col;
1161       value = v + k*(stepval+4 + l)*4;
1162       while (high-low > 7) {
1163         t = (low+high)/2;
1164         if (rp[t] > col) high = t;
1165         else             low  = t;
1166       }
1167       for (i=low; i<high; i++) {
1168         if (rp[i] > col) break;
1169         if (rp[i] == col) {
1170           bap  = ap +  16*i;
1171           for (ii=0; ii<4; ii++,value+=stepval) {
1172             for (jj=ii; jj<16; jj+=4) {
1173               bap[jj] += *value++;
1174             }
1175           }
1176           goto noinsert2;
1177         }
1178       }
1179       N = nrow++ - 1;
1180       high++; /* added new column index thus must search to one higher than before */
1181       /* shift up all the later entries in this row */
1182       for (ii=N; ii>=i; ii--) {
1183         rp[ii+1] = rp[ii];
1184         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1185       }
1186       if (N >= i) {
1187         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1188       }
1189       rp[i] = col;
1190       bap   = ap +  16*i;
1191       for (ii=0; ii<4; ii++,value+=stepval) {
1192         for (jj=ii; jj<16; jj+=4) {
1193           bap[jj] = *value++;
1194         }
1195       }
1196       noinsert2:;
1197       low = i;
1198     }
1199     ailen[row] = nrow;
1200   }
1201   PetscFunctionReturnVoid();
1202 }
1203 EXTERN_C_END
1204 
1205 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1206 #define matsetvalues4_ MATSETVALUES4
1207 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1208 #define matsetvalues4_ matsetvalues4
1209 #endif
1210 
1211 EXTERN_C_BEGIN
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatSetValues4_"
1214 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1215 {
1216   Mat         A = *AA;
1217   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1218   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1219   PetscInt    *ai=a->i,*ailen=a->ilen;
1220   PetscInt    *aj=a->j,brow,bcol;
1221   PetscInt    ridx,cidx,lastcol = -1;
1222   MatScalar   *ap,value,*aa=a->a,*bap;
1223 
1224   PetscFunctionBegin;
1225   for (k=0; k<m; k++) { /* loop over added rows */
1226     row  = im[k]; brow = row/4;
1227     rp   = aj + ai[brow];
1228     ap   = aa + 16*ai[brow];
1229     nrow = ailen[brow];
1230     low  = 0;
1231     high = nrow;
1232     for (l=0; l<n; l++) { /* loop over added columns */
1233       col = in[l]; bcol = col/4;
1234       ridx = row % 4; cidx = col % 4;
1235       value = v[l + k*n];
1236       if (col <= lastcol) low = 0; else high = nrow;
1237       lastcol = col;
1238       while (high-low > 7) {
1239         t = (low+high)/2;
1240         if (rp[t] > bcol) high = t;
1241         else              low  = t;
1242       }
1243       for (i=low; i<high; i++) {
1244         if (rp[i] > bcol) break;
1245         if (rp[i] == bcol) {
1246           bap  = ap +  16*i + 4*cidx + ridx;
1247           *bap += value;
1248           goto noinsert1;
1249         }
1250       }
1251       N = nrow++ - 1;
1252       high++; /* added new column thus must search to one higher than before */
1253       /* shift up all the later entries in this row */
1254       for (ii=N; ii>=i; ii--) {
1255         rp[ii+1] = rp[ii];
1256         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1257       }
1258       if (N>=i) {
1259         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1260       }
1261       rp[i]                    = bcol;
1262       ap[16*i + 4*cidx + ridx] = value;
1263       noinsert1:;
1264       low = i;
1265     }
1266     ailen[brow] = nrow;
1267   }
1268   PetscFunctionReturnVoid();
1269 }
1270 EXTERN_C_END
1271 
1272 /*
1273      Checks for missing diagonals
1274 */
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1277 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1278 {
1279   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1280   PetscErrorCode ierr;
1281   PetscInt       *diag,*jj = a->j,i;
1282 
1283   PetscFunctionBegin;
1284   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1285   *missing = PETSC_FALSE;
1286   if (A->rmap->n > 0 && !jj) {
1287     *missing  = PETSC_TRUE;
1288     if (d) *d = 0;
1289     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1290   } else {
1291     diag     = a->diag;
1292     for (i=0; i<a->mbs; i++) {
1293       if (jj[diag[i]] != i) {
1294         *missing  = PETSC_TRUE;
1295         if (d) *d = i;
1296         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1297       }
1298     }
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1305 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1306 {
1307   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309   PetscInt       i,j,m = a->mbs;
1310 
1311   PetscFunctionBegin;
1312   if (!a->diag) {
1313     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1314     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1315     a->free_diag = PETSC_TRUE;
1316   }
1317   for (i=0; i<m; i++) {
1318     a->diag[i] = a->i[i+1];
1319     for (j=a->i[i]; j<a->i[i+1]; j++) {
1320       if (a->j[j] == i) {
1321         a->diag[i] = j;
1322         break;
1323       }
1324     }
1325   }
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 
1330 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1334 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1335 {
1336   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1337   PetscErrorCode ierr;
1338   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,k,l,cnt;
1339   PetscInt       *tia, *tja;
1340 
1341   PetscFunctionBegin;
1342   *nn = n;
1343   if (!ia) PetscFunctionReturn(0);
1344   if (symmetric) {
1345     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1346     nz   = tia[n];
1347   } else {
1348     tia = a->i; tja = a->j;
1349   }
1350 
1351   if (!blockcompressed && bs > 1) {
1352     (*nn) *= bs;
1353     /* malloc & create the natural set of indices */
1354     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1355     if (n) {
1356       (*ia)[0] = 0;
1357       for (j=1; j<bs; j++) {
1358         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1359       }
1360     }
1361 
1362     for (i=1; i<n; i++) {
1363       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1364       for (j=1; j<bs; j++) {
1365         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1366       }
1367     }
1368     if (n) {
1369       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1370     }
1371 
1372     if (ja) {
1373       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1374       cnt = 0;
1375       for (i=0; i<n; i++) {
1376         for (j=0; j<bs; j++) {
1377           for (k=tia[i]; k<tia[i+1]; k++) {
1378             for (l=0; l<bs; l++) {
1379               (*ja)[cnt++] = bs*tja[k] + l;
1380 	    }
1381           }
1382         }
1383       }
1384     }
1385 
1386     n     *= bs;
1387     nz *= bs*bs;
1388     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1389       ierr = PetscFree(tia);CHKERRQ(ierr);
1390       ierr = PetscFree(tja);CHKERRQ(ierr);
1391     }
1392   } else if (oshift == 1) {
1393     if (symmetric) {
1394       PetscInt nz = tia[A->rmap->n/bs];
1395       /*  add 1 to i and j indices */
1396       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1397       *ia = tia;
1398       if (ja) {
1399 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1400         *ja = tja;
1401       }
1402     } else {
1403       PetscInt nz = a->i[A->rmap->n/bs];
1404       /* malloc space and  add 1 to i and j indices */
1405       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1406       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1407       if (ja) {
1408 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1409 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1410       }
1411     }
1412   } else {
1413     *ia = tia;
1414     if (ja) *ja = tja;
1415   }
1416 
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1422 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1423 {
1424   PetscErrorCode ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!ia) PetscFunctionReturn(0);
1428   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1429     ierr = PetscFree(*ia);CHKERRQ(ierr);
1430     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1437 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441 
1442   PetscFunctionBegin;
1443 #if defined(PETSC_USE_LOG)
1444   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1445 #endif
1446   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1447   if (a->row) {
1448     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1449   }
1450   if (a->col) {
1451     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1452   }
1453   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1454   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1455   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1456   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1457   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1458   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1459   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1460   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1461   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1462   if (a->compressedrow.use){ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);}
1463 
1464   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1465   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1466   ierr = PetscFree(a);CHKERRQ(ierr);
1467 
1468   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1469   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1470   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1471   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1472   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1473   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1474   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1475   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1476   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1482 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486 
1487   PetscFunctionBegin;
1488   switch (op) {
1489   case MAT_ROW_ORIENTED:
1490     a->roworiented    = flg;
1491     break;
1492   case MAT_KEEP_NONZERO_PATTERN:
1493     a->keepnonzeropattern = flg;
1494     break;
1495   case MAT_NEW_NONZERO_LOCATIONS:
1496     a->nonew          = (flg ? 0 : 1);
1497     break;
1498   case MAT_NEW_NONZERO_LOCATION_ERR:
1499     a->nonew          = (flg ? -1 : 0);
1500     break;
1501   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1502     a->nonew          = (flg ? -2 : 0);
1503     break;
1504   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1505     a->nounused       = (flg ? -1 : 0);
1506     break;
1507   case MAT_NEW_DIAGONALS:
1508   case MAT_IGNORE_OFF_PROC_ENTRIES:
1509   case MAT_USE_HASH_TABLE:
1510     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1511     break;
1512   case MAT_SYMMETRIC:
1513   case MAT_STRUCTURALLY_SYMMETRIC:
1514   case MAT_HERMITIAN:
1515   case MAT_SYMMETRY_ETERNAL:
1516     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1517     break;
1518   default:
1519     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1520   }
1521   PetscFunctionReturn(0);
1522 }
1523 
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1526 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1527 {
1528   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1529   PetscErrorCode ierr;
1530   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1531   MatScalar      *aa,*aa_i;
1532   PetscScalar    *v_i;
1533 
1534   PetscFunctionBegin;
1535   bs  = A->rmap->bs;
1536   ai  = a->i;
1537   aj  = a->j;
1538   aa  = a->a;
1539   bs2 = a->bs2;
1540 
1541   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1542 
1543   bn  = row/bs;   /* Block number */
1544   bp  = row % bs; /* Block Position */
1545   M   = ai[bn+1] - ai[bn];
1546   *nz = bs*M;
1547 
1548   if (v) {
1549     *v = 0;
1550     if (*nz) {
1551       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1552       for (i=0; i<M; i++) { /* for each block in the block row */
1553         v_i  = *v + i*bs;
1554         aa_i = aa + bs2*(ai[bn] + i);
1555         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1556       }
1557     }
1558   }
1559 
1560   if (idx) {
1561     *idx = 0;
1562     if (*nz) {
1563       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1564       for (i=0; i<M; i++) { /* for each block in the block row */
1565         idx_i = *idx + i*bs;
1566         itmp  = bs*aj[ai[bn] + i];
1567         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1568       }
1569     }
1570   }
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1576 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1577 {
1578   PetscErrorCode ierr;
1579 
1580   PetscFunctionBegin;
1581   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1582   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1587 
1588 #undef __FUNCT__
1589 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1590 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1591 {
1592   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1593   Mat            C;
1594   PetscErrorCode ierr;
1595   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1596   PetscInt       *rows,*cols,bs2=a->bs2;
1597   MatScalar      *array;
1598 
1599   PetscFunctionBegin;
1600   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1601   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1602     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1603     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1604 
1605     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1606     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1607     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1608     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1609     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1610     ierr = PetscFree(col);CHKERRQ(ierr);
1611   } else {
1612     C = *B;
1613   }
1614 
1615   array = a->a;
1616   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1617   for (i=0; i<mbs; i++) {
1618     cols[0] = i*bs;
1619     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1620     len = ai[i+1] - ai[i];
1621     for (j=0; j<len; j++) {
1622       rows[0] = (*aj++)*bs;
1623       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1624       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1625       array += bs2;
1626     }
1627   }
1628   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1629 
1630   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1631   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1632 
1633   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1634     *B = C;
1635   } else {
1636     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1637   }
1638   PetscFunctionReturn(0);
1639 }
1640 
1641 #undef __FUNCT__
1642 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1643 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1644 {
1645   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1646   PetscErrorCode ierr;
1647   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1648   int            fd;
1649   PetscScalar    *aa;
1650   FILE           *file;
1651 
1652   PetscFunctionBegin;
1653   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1654   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1655   col_lens[0] = MAT_FILE_CLASSID;
1656 
1657   col_lens[1] = A->rmap->N;
1658   col_lens[2] = A->cmap->n;
1659   col_lens[3] = a->nz*bs2;
1660 
1661   /* store lengths of each row and write (including header) to file */
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1666     }
1667   }
1668   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1669   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1670 
1671   /* store column indices (zero start index) */
1672   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1673   count = 0;
1674   for (i=0; i<a->mbs; i++) {
1675     for (j=0; j<bs; j++) {
1676       for (k=a->i[i]; k<a->i[i+1]; k++) {
1677         for (l=0; l<bs; l++) {
1678           jj[count++] = bs*a->j[k] + l;
1679         }
1680       }
1681     }
1682   }
1683   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1684   ierr = PetscFree(jj);CHKERRQ(ierr);
1685 
1686   /* store nonzero values */
1687   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1688   count = 0;
1689   for (i=0; i<a->mbs; i++) {
1690     for (j=0; j<bs; j++) {
1691       for (k=a->i[i]; k<a->i[i+1]; k++) {
1692         for (l=0; l<bs; l++) {
1693           aa[count++] = a->a[bs2*k + l*bs + j];
1694         }
1695       }
1696     }
1697   }
1698   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1699   ierr = PetscFree(aa);CHKERRQ(ierr);
1700 
1701   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1702   if (file) {
1703     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 #undef __FUNCT__
1709 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1710 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1711 {
1712   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1713   PetscErrorCode    ierr;
1714   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1715   PetscViewerFormat format;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1719   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1720     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1721   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1722     Mat aij;
1723     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1724     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1725     ierr = MatDestroy(aij);CHKERRQ(ierr);
1726   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1727      PetscFunctionReturn(0);
1728   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1730     for (i=0; i<a->mbs; i++) {
1731       for (j=0; j<bs; j++) {
1732         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1733         for (k=a->i[i]; k<a->i[i+1]; k++) {
1734           for (l=0; l<bs; l++) {
1735 #if defined(PETSC_USE_COMPLEX)
1736             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1738                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1739             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1741                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1742             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1743               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1744             }
1745 #else
1746             if (a->a[bs2*k + l*bs + j] != 0.0) {
1747               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1748             }
1749 #endif
1750           }
1751         }
1752         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1753       }
1754     }
1755     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1756   } else {
1757     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1758     for (i=0; i<a->mbs; i++) {
1759       for (j=0; j<bs; j++) {
1760         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1761         for (k=a->i[i]; k<a->i[i+1]; k++) {
1762           for (l=0; l<bs; l++) {
1763 #if defined(PETSC_USE_COMPLEX)
1764             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1765               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1766                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1767             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1768               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1769                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1770             } else {
1771               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1772             }
1773 #else
1774             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1775 #endif
1776           }
1777         }
1778         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1779       }
1780     }
1781     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1782   }
1783   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1789 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1790 {
1791   Mat            A = (Mat) Aa;
1792   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1793   PetscErrorCode ierr;
1794   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1795   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1796   MatScalar      *aa;
1797   PetscViewer    viewer;
1798 
1799   PetscFunctionBegin;
1800 
1801   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1802   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1803 
1804   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1805 
1806   /* loop over matrix elements drawing boxes */
1807   color = PETSC_DRAW_BLUE;
1808   for (i=0,row=0; i<mbs; i++,row+=bs) {
1809     for (j=a->i[i]; j<a->i[i+1]; j++) {
1810       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1811       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1812       aa = a->a + j*bs2;
1813       for (k=0; k<bs; k++) {
1814         for (l=0; l<bs; l++) {
1815           if (PetscRealPart(*aa++) >=  0.) continue;
1816           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1817         }
1818       }
1819     }
1820   }
1821   color = PETSC_DRAW_CYAN;
1822   for (i=0,row=0; i<mbs; i++,row+=bs) {
1823     for (j=a->i[i]; j<a->i[i+1]; j++) {
1824       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1825       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1826       aa = a->a + j*bs2;
1827       for (k=0; k<bs; k++) {
1828         for (l=0; l<bs; l++) {
1829           if (PetscRealPart(*aa++) != 0.) continue;
1830           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1831         }
1832       }
1833     }
1834   }
1835 
1836   color = PETSC_DRAW_RED;
1837   for (i=0,row=0; i<mbs; i++,row+=bs) {
1838     for (j=a->i[i]; j<a->i[i+1]; j++) {
1839       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1840       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1841       aa = a->a + j*bs2;
1842       for (k=0; k<bs; k++) {
1843         for (l=0; l<bs; l++) {
1844           if (PetscRealPart(*aa++) <= 0.) continue;
1845           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1846         }
1847       }
1848     }
1849   }
1850   PetscFunctionReturn(0);
1851 }
1852 
1853 #undef __FUNCT__
1854 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1855 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1856 {
1857   PetscErrorCode ierr;
1858   PetscReal      xl,yl,xr,yr,w,h;
1859   PetscDraw      draw;
1860   PetscTruth     isnull;
1861 
1862   PetscFunctionBegin;
1863 
1864   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1865   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1866 
1867   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1868   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1869   xr += w;    yr += h;  xl = -w;     yl = -h;
1870   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1871   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1872   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1873   PetscFunctionReturn(0);
1874 }
1875 
1876 #undef __FUNCT__
1877 #define __FUNCT__ "MatView_SeqBAIJ"
1878 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1879 {
1880   PetscErrorCode ierr;
1881   PetscTruth     iascii,isbinary,isdraw;
1882 
1883   PetscFunctionBegin;
1884   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1885   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1886   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1887   if (iascii){
1888     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1889   } else if (isbinary) {
1890     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1891   } else if (isdraw) {
1892     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1893   } else {
1894     Mat B;
1895     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1896     ierr = MatView(B,viewer);CHKERRQ(ierr);
1897     ierr = MatDestroy(B);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1905 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1906 {
1907   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1908   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1909   PetscInt    *ai = a->i,*ailen = a->ilen;
1910   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1911   MatScalar   *ap,*aa = a->a;
1912 
1913   PetscFunctionBegin;
1914   for (k=0; k<m; k++) { /* loop over rows */
1915     row  = im[k]; brow = row/bs;
1916     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1917     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1918     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1919     nrow = ailen[brow];
1920     for (l=0; l<n; l++) { /* loop over columns */
1921       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1922       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1923       col  = in[l] ;
1924       bcol = col/bs;
1925       cidx = col%bs;
1926       ridx = row%bs;
1927       high = nrow;
1928       low  = 0; /* assume unsorted */
1929       while (high-low > 5) {
1930         t = (low+high)/2;
1931         if (rp[t] > bcol) high = t;
1932         else             low  = t;
1933       }
1934       for (i=low; i<high; i++) {
1935         if (rp[i] > bcol) break;
1936         if (rp[i] == bcol) {
1937           *v++ = ap[bs2*i+bs*cidx+ridx];
1938           goto finished;
1939         }
1940       }
1941       *v++ = 0.0;
1942       finished:;
1943     }
1944   }
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1950 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1951 {
1952   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1953   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1954   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1955   PetscErrorCode    ierr;
1956   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1957   PetscTruth        roworiented=a->roworiented;
1958   const PetscScalar *value = v;
1959   MatScalar         *ap,*aa = a->a,*bap;
1960 
1961   PetscFunctionBegin;
1962   if (roworiented) {
1963     stepval = (n-1)*bs;
1964   } else {
1965     stepval = (m-1)*bs;
1966   }
1967   for (k=0; k<m; k++) { /* loop over added rows */
1968     row  = im[k];
1969     if (row < 0) continue;
1970 #if defined(PETSC_USE_DEBUG)
1971     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1972 #endif
1973     rp   = aj + ai[row];
1974     ap   = aa + bs2*ai[row];
1975     rmax = imax[row];
1976     nrow = ailen[row];
1977     low  = 0;
1978     high = nrow;
1979     for (l=0; l<n; l++) { /* loop over added columns */
1980       if (in[l] < 0) continue;
1981 #if defined(PETSC_USE_DEBUG)
1982       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1983 #endif
1984       col = in[l];
1985       if (roworiented) {
1986         value = v + (k*(stepval+bs) + l)*bs;
1987       } else {
1988         value = v + (l*(stepval+bs) + k)*bs;
1989       }
1990       if (col <= lastcol) low = 0; else high = nrow;
1991       lastcol = col;
1992       while (high-low > 7) {
1993         t = (low+high)/2;
1994         if (rp[t] > col) high = t;
1995         else             low  = t;
1996       }
1997       for (i=low; i<high; i++) {
1998         if (rp[i] > col) break;
1999         if (rp[i] == col) {
2000           bap  = ap +  bs2*i;
2001           if (roworiented) {
2002             if (is == ADD_VALUES) {
2003               for (ii=0; ii<bs; ii++,value+=stepval) {
2004                 for (jj=ii; jj<bs2; jj+=bs) {
2005                   bap[jj] += *value++;
2006                 }
2007               }
2008             } else {
2009               for (ii=0; ii<bs; ii++,value+=stepval) {
2010                 for (jj=ii; jj<bs2; jj+=bs) {
2011                   bap[jj] = *value++;
2012                 }
2013               }
2014             }
2015           } else {
2016             if (is == ADD_VALUES) {
2017               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2018                 for (jj=0; jj<bs; jj++) {
2019                   bap[jj] += value[jj];
2020                 }
2021                 bap += bs;
2022               }
2023             } else {
2024               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2025                 for (jj=0; jj<bs; jj++) {
2026                   bap[jj]  = value[jj];
2027                 }
2028                 bap += bs;
2029               }
2030             }
2031           }
2032           goto noinsert2;
2033         }
2034       }
2035       if (nonew == 1) goto noinsert2;
2036       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2037       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2038       N = nrow++ - 1; high++;
2039       /* shift up all the later entries in this row */
2040       for (ii=N; ii>=i; ii--) {
2041         rp[ii+1] = rp[ii];
2042         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2043       }
2044       if (N >= i) {
2045         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2046       }
2047       rp[i] = col;
2048       bap   = ap +  bs2*i;
2049       if (roworiented) {
2050         for (ii=0; ii<bs; ii++,value+=stepval) {
2051           for (jj=ii; jj<bs2; jj+=bs) {
2052             bap[jj] = *value++;
2053           }
2054         }
2055       } else {
2056         for (ii=0; ii<bs; ii++,value+=stepval) {
2057           for (jj=0; jj<bs; jj++) {
2058             *bap++  = *value++;
2059           }
2060         }
2061       }
2062       noinsert2:;
2063       low = i;
2064     }
2065     ailen[row] = nrow;
2066   }
2067   PetscFunctionReturn(0);
2068 }
2069 
2070 #undef __FUNCT__
2071 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2072 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2073 {
2074   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2075   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2076   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2077   PetscErrorCode ierr;
2078   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2079   MatScalar      *aa = a->a,*ap;
2080   PetscReal      ratio=0.6;
2081 
2082   PetscFunctionBegin;
2083   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2084 
2085   if (m) rmax = ailen[0];
2086   for (i=1; i<mbs; i++) {
2087     /* move each row back by the amount of empty slots (fshift) before it*/
2088     fshift += imax[i-1] - ailen[i-1];
2089     rmax   = PetscMax(rmax,ailen[i]);
2090     if (fshift) {
2091       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2092       N = ailen[i];
2093       for (j=0; j<N; j++) {
2094         ip[j-fshift] = ip[j];
2095         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2096       }
2097     }
2098     ai[i] = ai[i-1] + ailen[i-1];
2099   }
2100   if (mbs) {
2101     fshift += imax[mbs-1] - ailen[mbs-1];
2102     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2103   }
2104   /* reset ilen and imax for each row */
2105   for (i=0; i<mbs; i++) {
2106     ailen[i] = imax[i] = ai[i+1] - ai[i];
2107   }
2108   a->nz = ai[mbs];
2109 
2110   /* diagonals may have moved, so kill the diagonal pointers */
2111   a->idiagvalid = PETSC_FALSE;
2112   if (fshift && a->diag) {
2113     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2114     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2115     a->diag = 0;
2116   }
2117   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2118   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2119   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2121   A->info.mallocs     += a->reallocs;
2122   a->reallocs          = 0;
2123   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2124 
2125   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
2126   if (a->compressedrow.use){
2127     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2128   }
2129 
2130   A->same_nonzero = PETSC_TRUE;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 /*
2135    This function returns an array of flags which indicate the locations of contiguous
2136    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2137    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2138    Assume: sizes should be long enough to hold all the values.
2139 */
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2142 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2143 {
2144   PetscInt   i,j,k,row;
2145   PetscTruth flg;
2146 
2147   PetscFunctionBegin;
2148   for (i=0,j=0; i<n; j++) {
2149     row = idx[i];
2150     if (row%bs!=0) { /* Not the begining of a block */
2151       sizes[j] = 1;
2152       i++;
2153     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2154       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2155       i++;
2156     } else { /* Begining of the block, so check if the complete block exists */
2157       flg = PETSC_TRUE;
2158       for (k=1; k<bs; k++) {
2159         if (row+k != idx[i+k]) { /* break in the block */
2160           flg = PETSC_FALSE;
2161           break;
2162         }
2163       }
2164       if (flg) { /* No break in the bs */
2165         sizes[j] = bs;
2166         i+= bs;
2167       } else {
2168         sizes[j] = 1;
2169         i++;
2170       }
2171     }
2172   }
2173   *bs_max = j;
2174   PetscFunctionReturn(0);
2175 }
2176 
2177 #undef __FUNCT__
2178 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2179 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2180 {
2181   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2182   PetscErrorCode ierr;
2183   PetscInt       i,j,k,count,*rows;
2184   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2185   PetscScalar    zero = 0.0;
2186   MatScalar      *aa;
2187 
2188   PetscFunctionBegin;
2189   /* Make a copy of the IS and  sort it */
2190   /* allocate memory for rows,sizes */
2191   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2192 
2193   /* copy IS values to rows, and sort them */
2194   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2195   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2196   if (baij->keepnonzeropattern) {
2197     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2198     bs_max = is_n;
2199     A->same_nonzero = PETSC_TRUE;
2200   } else {
2201     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2202     A->same_nonzero = PETSC_FALSE;
2203   }
2204 
2205   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2206     row   = rows[j];
2207     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2208     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2209     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2210     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2211       if (diag != 0.0) {
2212         if (baij->ilen[row/bs] > 0) {
2213           baij->ilen[row/bs]       = 1;
2214           baij->j[baij->i[row/bs]] = row/bs;
2215           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2216         }
2217         /* Now insert all the diagonal values for this bs */
2218         for (k=0; k<bs; k++) {
2219           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2220         }
2221       } else { /* (diag == 0.0) */
2222         baij->ilen[row/bs] = 0;
2223       } /* end (diag == 0.0) */
2224     } else { /* (sizes[i] != bs) */
2225 #if defined (PETSC_USE_DEBUG)
2226       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2227 #endif
2228       for (k=0; k<count; k++) {
2229         aa[0] =  zero;
2230         aa    += bs;
2231       }
2232       if (diag != 0.0) {
2233         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2234       }
2235     }
2236   }
2237 
2238   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2239   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2240   PetscFunctionReturn(0);
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2245 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2246 {
2247   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2248   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2249   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2250   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2251   PetscErrorCode ierr;
2252   PetscInt       ridx,cidx,bs2=a->bs2;
2253   PetscTruth     roworiented=a->roworiented;
2254   MatScalar      *ap,value,*aa=a->a,*bap;
2255 
2256   PetscFunctionBegin;
2257   if (v) PetscValidScalarPointer(v,6);
2258   for (k=0; k<m; k++) { /* loop over added rows */
2259     row  = im[k];
2260     brow = row/bs;
2261     if (row < 0) continue;
2262 #if defined(PETSC_USE_DEBUG)
2263     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2264 #endif
2265     rp   = aj + ai[brow];
2266     ap   = aa + bs2*ai[brow];
2267     rmax = imax[brow];
2268     nrow = ailen[brow];
2269     low  = 0;
2270     high = nrow;
2271     for (l=0; l<n; l++) { /* loop over added columns */
2272       if (in[l] < 0) continue;
2273 #if defined(PETSC_USE_DEBUG)
2274       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2275 #endif
2276       col = in[l]; bcol = col/bs;
2277       ridx = row % bs; cidx = col % bs;
2278       if (roworiented) {
2279         value = v[l + k*n];
2280       } else {
2281         value = v[k + l*m];
2282       }
2283       if (col <= lastcol) low = 0; else high = nrow;
2284       lastcol = col;
2285       while (high-low > 7) {
2286         t = (low+high)/2;
2287         if (rp[t] > bcol) high = t;
2288         else              low  = t;
2289       }
2290       for (i=low; i<high; i++) {
2291         if (rp[i] > bcol) break;
2292         if (rp[i] == bcol) {
2293           bap  = ap +  bs2*i + bs*cidx + ridx;
2294           if (is == ADD_VALUES) *bap += value;
2295           else                  *bap  = value;
2296           goto noinsert1;
2297         }
2298       }
2299       if (nonew == 1) goto noinsert1;
2300       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2301       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2302       N = nrow++ - 1; high++;
2303       /* shift up all the later entries in this row */
2304       for (ii=N; ii>=i; ii--) {
2305         rp[ii+1] = rp[ii];
2306         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2307       }
2308       if (N>=i) {
2309         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2310       }
2311       rp[i]                      = bcol;
2312       ap[bs2*i + bs*cidx + ridx] = value;
2313       a->nz++;
2314       noinsert1:;
2315       low = i;
2316     }
2317     ailen[brow] = nrow;
2318   }
2319   A->same_nonzero = PETSC_FALSE;
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2325 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2326 {
2327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2328   Mat            outA;
2329   PetscErrorCode ierr;
2330   PetscTruth     row_identity,col_identity;
2331 
2332   PetscFunctionBegin;
2333   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2334   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2335   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2336   if (!row_identity || !col_identity) {
2337     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2338   }
2339 
2340   outA            = inA;
2341   inA->factortype = MAT_FACTOR_LU;
2342 
2343   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2344 
2345   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2346   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2347   a->row = row;
2348   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2349   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2350   a->col = col;
2351 
2352   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2353   if (a->icol) {
2354     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2355   }
2356   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2357   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2358 
2359   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2360   if (!a->solve_work) {
2361     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2362     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2363   }
2364   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2365 
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 EXTERN_C_BEGIN
2370 #undef __FUNCT__
2371 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2372 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2373 {
2374   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2375   PetscInt    i,nz,mbs;
2376 
2377   PetscFunctionBegin;
2378   nz  = baij->maxnz/baij->bs2;
2379   mbs = baij->mbs;
2380   for (i=0; i<nz; i++) {
2381     baij->j[i] = indices[i];
2382   }
2383   baij->nz = nz;
2384   for (i=0; i<mbs; i++) {
2385     baij->ilen[i] = baij->imax[i];
2386   }
2387   PetscFunctionReturn(0);
2388 }
2389 EXTERN_C_END
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2393 /*@
2394     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2395        in the matrix.
2396 
2397   Input Parameters:
2398 +  mat - the SeqBAIJ matrix
2399 -  indices - the column indices
2400 
2401   Level: advanced
2402 
2403   Notes:
2404     This can be called if you have precomputed the nonzero structure of the
2405   matrix and want to provide it to the matrix object to improve the performance
2406   of the MatSetValues() operation.
2407 
2408     You MUST have set the correct numbers of nonzeros per row in the call to
2409   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2410 
2411     MUST be called before any calls to MatSetValues();
2412 
2413 @*/
2414 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2415 {
2416   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2417 
2418   PetscFunctionBegin;
2419   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2420   PetscValidPointer(indices,2);
2421   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2422   if (f) {
2423     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2424   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2425   PetscFunctionReturn(0);
2426 }
2427 
2428 #undef __FUNCT__
2429 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2430 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2431 {
2432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2433   PetscErrorCode ierr;
2434   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2435   PetscReal      atmp;
2436   PetscScalar    *x,zero = 0.0;
2437   MatScalar      *aa;
2438   PetscInt       ncols,brow,krow,kcol;
2439 
2440   PetscFunctionBegin;
2441   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2442   bs   = A->rmap->bs;
2443   aa   = a->a;
2444   ai   = a->i;
2445   aj   = a->j;
2446   mbs  = a->mbs;
2447 
2448   ierr = VecSet(v,zero);CHKERRQ(ierr);
2449   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2450   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2451   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2452   for (i=0; i<mbs; i++) {
2453     ncols = ai[1] - ai[0]; ai++;
2454     brow  = bs*i;
2455     for (j=0; j<ncols; j++){
2456       for (kcol=0; kcol<bs; kcol++){
2457         for (krow=0; krow<bs; krow++){
2458           atmp = PetscAbsScalar(*aa);aa++;
2459           row = brow + krow;    /* row index */
2460           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2461           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2462         }
2463       }
2464       aj++;
2465     }
2466   }
2467   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatCopy_SeqBAIJ"
2473 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2474 {
2475   PetscErrorCode ierr;
2476 
2477   PetscFunctionBegin;
2478   /* If the two matrices have the same copy implementation, use fast copy. */
2479   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2480     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2481     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2482 
2483     if (a->i[A->rmap->N] != b->i[B->rmap->N]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2484     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2485   } else {
2486     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2487   }
2488   PetscFunctionReturn(0);
2489 }
2490 
2491 #undef __FUNCT__
2492 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2493 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2494 {
2495   PetscErrorCode ierr;
2496 
2497   PetscFunctionBegin;
2498   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2504 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2505 {
2506   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2507   PetscFunctionBegin;
2508   *array = a->a;
2509   PetscFunctionReturn(0);
2510 }
2511 
2512 #undef __FUNCT__
2513 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2514 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2515 {
2516   PetscFunctionBegin;
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 #undef __FUNCT__
2521 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2522 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2523 {
2524   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2525   PetscErrorCode ierr;
2526   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2527   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2528 
2529   PetscFunctionBegin;
2530   if (str == SAME_NONZERO_PATTERN) {
2531     PetscScalar alpha = a;
2532     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2533   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2534     if (y->xtoy && y->XtoY != X) {
2535       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2536       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2537     }
2538     if (!y->xtoy) { /* get xtoy */
2539       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2540       y->XtoY = X;
2541       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2542     }
2543     bs2 = bs*bs;
2544     for (i=0; i<x->nz; i++) {
2545       j = 0;
2546       while (j < bs2){
2547         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2548         j++;
2549       }
2550     }
2551     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2552   } else {
2553     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2554   }
2555   PetscFunctionReturn(0);
2556 }
2557 
2558 #undef __FUNCT__
2559 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2560 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2561 {
2562   PetscInt rbs,cbs;
2563   PetscErrorCode ierr;
2564 
2565   PetscFunctionBegin;
2566   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2567   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2568   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2569   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2570   PetscFunctionReturn(0);
2571 }
2572 
2573 #undef __FUNCT__
2574 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2575 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2576 {
2577   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2578   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2579   MatScalar      *aa = a->a;
2580 
2581   PetscFunctionBegin;
2582   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2588 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2589 {
2590   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2591   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2592   MatScalar      *aa = a->a;
2593 
2594   PetscFunctionBegin;
2595   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2600 
2601 #undef __FUNCT__
2602 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2603 /*
2604     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2605 */
2606 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2607 {
2608   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2609   PetscErrorCode ierr;
2610   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2611   PetscInt       nz = a->i[m],row,*jj,mr,col;
2612 
2613   PetscFunctionBegin;
2614   *nn = n;
2615   if (!ia) PetscFunctionReturn(0);
2616   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2617   else {
2618     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2619     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2620     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2621     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2622     jj = a->j;
2623     for (i=0; i<nz; i++) {
2624       collengths[jj[i]]++;
2625     }
2626     cia[0] = oshift;
2627     for (i=0; i<n; i++) {
2628       cia[i+1] = cia[i] + collengths[i];
2629     }
2630     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2631     jj   = a->j;
2632     for (row=0; row<m; row++) {
2633       mr = a->i[row+1] - a->i[row];
2634       for (i=0; i<mr; i++) {
2635         col = *jj++;
2636         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2637       }
2638     }
2639     ierr = PetscFree(collengths);CHKERRQ(ierr);
2640     *ia = cia; *ja = cja;
2641   }
2642   PetscFunctionReturn(0);
2643 }
2644 
2645 #undef __FUNCT__
2646 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2647 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2648 {
2649   PetscErrorCode ierr;
2650 
2651   PetscFunctionBegin;
2652   if (!ia) PetscFunctionReturn(0);
2653   ierr = PetscFree(*ia);CHKERRQ(ierr);
2654   ierr = PetscFree(*ja);CHKERRQ(ierr);
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 #undef __FUNCT__
2659 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2660 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2661 {
2662   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2663   PetscErrorCode ierr;
2664   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2665   PetscScalar    dx,*y,*xx,*w3_array;
2666   PetscScalar    *vscale_array;
2667   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2668   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2669   void           *fctx = coloring->fctx;
2670   PetscTruth     flg = PETSC_FALSE;
2671   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2672   Vec            x1_tmp;
2673 
2674   PetscFunctionBegin;
2675   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2676   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2677   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2678   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2679 
2680   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2681   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2682   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2683   if (flg) {
2684     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2685   } else {
2686     PetscTruth assembled;
2687     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2688     if (assembled) {
2689       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2690     }
2691   }
2692 
2693   x1_tmp = x1;
2694   if (!coloring->vscale){
2695     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2696   }
2697 
2698   /*
2699     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2700     coloring->F for the coarser grids from the finest
2701   */
2702   if (coloring->F) {
2703     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2704     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2705     if (m1 != m2) {
2706       coloring->F = 0;
2707       }
2708     }
2709 
2710   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2711     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2712   }
2713   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2714 
2715   /* Set w1 = F(x1) */
2716   if (coloring->F) {
2717     w1          = coloring->F; /* use already computed value of function */
2718     coloring->F = 0;
2719   } else {
2720     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2721     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2722     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2723   }
2724 
2725   if (!coloring->w3) {
2726     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2727     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2728   }
2729   w3 = coloring->w3;
2730 
2731     CHKMEMQ;
2732     /* Compute all the local scale factors, including ghost points */
2733   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2734   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2735   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2736   if (ctype == IS_COLORING_GHOSTED){
2737     col_start = 0; col_end = N;
2738   } else if (ctype == IS_COLORING_GLOBAL){
2739     xx = xx - start;
2740     vscale_array = vscale_array - start;
2741     col_start = start; col_end = N + start;
2742   }    CHKMEMQ;
2743   for (col=col_start; col<col_end; col++){
2744     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2745     if (coloring->htype[0] == 'w') {
2746       dx = 1.0 + unorm;
2747     } else {
2748       dx  = xx[col];
2749     }
2750     if (dx == 0.0) dx = 1.0;
2751 #if !defined(PETSC_USE_COMPLEX)
2752     if (dx < umin && dx >= 0.0)      dx = umin;
2753     else if (dx < 0.0 && dx > -umin) dx = -umin;
2754 #else
2755     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2756     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2757 #endif
2758     dx               *= epsilon;
2759     vscale_array[col] = 1.0/dx;
2760   }     CHKMEMQ;
2761   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2762   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2763   if (ctype == IS_COLORING_GLOBAL){
2764     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2765     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2766   }
2767   CHKMEMQ;
2768   if (coloring->vscaleforrow) {
2769     vscaleforrow = coloring->vscaleforrow;
2770   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2771 
2772   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2773   /*
2774     Loop over each color
2775   */
2776   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2777   for (k=0; k<coloring->ncolors; k++) {
2778     coloring->currentcolor = k;
2779     for (i=0; i<bs; i++) {
2780       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2781       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2782       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2783       /*
2784 	Loop over each column associated with color
2785 	adding the perturbation to the vector w3.
2786       */
2787       for (l=0; l<coloring->ncolumns[k]; l++) {
2788 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2789 	if (coloring->htype[0] == 'w') {
2790 	  dx = 1.0 + unorm;
2791 	} else {
2792 	  dx  = xx[col];
2793 	}
2794 	if (dx == 0.0) dx = 1.0;
2795 #if !defined(PETSC_USE_COMPLEX)
2796 	if (dx < umin && dx >= 0.0)      dx = umin;
2797 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2798 #else
2799 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2800 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2801 #endif
2802 	dx            *= epsilon;
2803 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2804 	w3_array[col] += dx;
2805       }
2806       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2807       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2808 
2809       /*
2810 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2811 	w2 = F(x1 + dx) - F(x1)
2812       */
2813       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2814       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2815       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2816       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2817 
2818       /*
2819 	Loop over rows of vector, putting results into Jacobian matrix
2820       */
2821       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2822       for (l=0; l<coloring->nrows[k]; l++) {
2823 	row    = bs*coloring->rows[k][l];             /* local row index */
2824 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2825         for (j=0; j<bs; j++) {
2826   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2827           srows[j]  = row + start + j;
2828         }
2829 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2830       }
2831       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2832     }
2833   } /* endof for each color */
2834   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2835   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2836   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2837   ierr = PetscFree(srows);CHKERRQ(ierr);
2838 
2839   coloring->currentcolor = -1;
2840   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2843   PetscFunctionReturn(0);
2844 }
2845 
2846 /* -------------------------------------------------------------------*/
2847 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2848        MatGetRow_SeqBAIJ,
2849        MatRestoreRow_SeqBAIJ,
2850        MatMult_SeqBAIJ_N,
2851 /* 4*/ MatMultAdd_SeqBAIJ_N,
2852        MatMultTranspose_SeqBAIJ,
2853        MatMultTransposeAdd_SeqBAIJ,
2854        0,
2855        0,
2856        0,
2857 /*10*/ 0,
2858        MatLUFactor_SeqBAIJ,
2859        0,
2860        0,
2861        MatTranspose_SeqBAIJ,
2862 /*15*/ MatGetInfo_SeqBAIJ,
2863        MatEqual_SeqBAIJ,
2864        MatGetDiagonal_SeqBAIJ,
2865        MatDiagonalScale_SeqBAIJ,
2866        MatNorm_SeqBAIJ,
2867 /*20*/ 0,
2868        MatAssemblyEnd_SeqBAIJ,
2869        MatSetOption_SeqBAIJ,
2870        MatZeroEntries_SeqBAIJ,
2871 /*24*/ MatZeroRows_SeqBAIJ,
2872        0,
2873        0,
2874        0,
2875        0,
2876 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2877        0,
2878        0,
2879        MatGetArray_SeqBAIJ,
2880        MatRestoreArray_SeqBAIJ,
2881 /*34*/ MatDuplicate_SeqBAIJ,
2882        0,
2883        0,
2884        MatILUFactor_SeqBAIJ,
2885        0,
2886 /*39*/ MatAXPY_SeqBAIJ,
2887        MatGetSubMatrices_SeqBAIJ,
2888        MatIncreaseOverlap_SeqBAIJ,
2889        MatGetValues_SeqBAIJ,
2890        MatCopy_SeqBAIJ,
2891 /*44*/ 0,
2892        MatScale_SeqBAIJ,
2893        0,
2894        0,
2895        0,
2896 /*49*/ MatSetBlockSize_SeqBAIJ,
2897        MatGetRowIJ_SeqBAIJ,
2898        MatRestoreRowIJ_SeqBAIJ,
2899        MatGetColumnIJ_SeqBAIJ,
2900        MatRestoreColumnIJ_SeqBAIJ,
2901 /*54*/ MatFDColoringCreate_SeqAIJ,
2902        0,
2903        0,
2904        0,
2905        MatSetValuesBlocked_SeqBAIJ,
2906 /*59*/ MatGetSubMatrix_SeqBAIJ,
2907        MatDestroy_SeqBAIJ,
2908        MatView_SeqBAIJ,
2909        0,
2910        0,
2911 /*64*/ 0,
2912        0,
2913        0,
2914        0,
2915        0,
2916 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2917        0,
2918        MatConvert_Basic,
2919        0,
2920        0,
2921 /*74*/ 0,
2922        MatFDColoringApply_BAIJ,
2923        0,
2924        0,
2925        0,
2926 /*79*/ 0,
2927        0,
2928        0,
2929        0,
2930        MatLoad_SeqBAIJ,
2931 /*84*/ 0,
2932        0,
2933        0,
2934        0,
2935        0,
2936 /*89*/ 0,
2937        0,
2938        0,
2939        0,
2940        0,
2941 /*94*/ 0,
2942        0,
2943        0,
2944        0,
2945        0,
2946 /*99*/0,
2947        0,
2948        0,
2949        0,
2950        0,
2951 /*104*/0,
2952        MatRealPart_SeqBAIJ,
2953        MatImaginaryPart_SeqBAIJ,
2954        0,
2955        0,
2956 /*109*/0,
2957        0,
2958        0,
2959        0,
2960        MatMissingDiagonal_SeqBAIJ,
2961 /*114*/0,
2962        0,
2963        0,
2964        0,
2965        0,
2966 /*119*/0,
2967        0,
2968        MatMultHermitianTranspose_SeqBAIJ,
2969        MatMultHermitianTransposeAdd_SeqBAIJ
2970 };
2971 
2972 EXTERN_C_BEGIN
2973 #undef __FUNCT__
2974 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2975 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2976 {
2977   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2978   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2979   PetscErrorCode ierr;
2980 
2981   PetscFunctionBegin;
2982   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2983 
2984   /* allocate space for values if not already there */
2985   if (!aij->saved_values) {
2986     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2987     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2988   }
2989 
2990   /* copy values over */
2991   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2992   PetscFunctionReturn(0);
2993 }
2994 EXTERN_C_END
2995 
2996 EXTERN_C_BEGIN
2997 #undef __FUNCT__
2998 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2999 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
3000 {
3001   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3002   PetscErrorCode ierr;
3003   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
3004 
3005   PetscFunctionBegin;
3006   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3007   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3008 
3009   /* copy values over */
3010   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3011   PetscFunctionReturn(0);
3012 }
3013 EXTERN_C_END
3014 
3015 EXTERN_C_BEGIN
3016 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3017 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3018 EXTERN_C_END
3019 
3020 EXTERN_C_BEGIN
3021 #undef __FUNCT__
3022 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3023 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3024 {
3025   Mat_SeqBAIJ    *b;
3026   PetscErrorCode ierr;
3027   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
3028   PetscTruth     flg,skipallocation = PETSC_FALSE;
3029 
3030   PetscFunctionBegin;
3031 
3032   if (nz == MAT_SKIP_ALLOCATION) {
3033     skipallocation = PETSC_TRUE;
3034     nz             = 0;
3035   }
3036 
3037   if (bs < 0) {
3038     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
3039       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
3040     ierr = PetscOptionsEnd();CHKERRQ(ierr);
3041     bs   = PetscAbs(bs);
3042   }
3043   if (nnz && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
3044   bs   = newbs;
3045 
3046   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3047   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3048   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3049   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3050 
3051   B->preallocated = PETSC_TRUE;
3052 
3053   mbs  = B->rmap->n/bs;
3054   nbs  = B->cmap->n/bs;
3055   bs2  = bs*bs;
3056 
3057   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3058 
3059   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3060   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3061   if (nnz) {
3062     for (i=0; i<mbs; i++) {
3063       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3064       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3065     }
3066   }
3067 
3068   b       = (Mat_SeqBAIJ*)B->data;
3069   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3070     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3071   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3072 
3073   if (!flg) {
3074     switch (bs) {
3075     case 1:
3076       B->ops->mult            = MatMult_SeqBAIJ_1;
3077       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3078       B->ops->sor             = MatSOR_SeqBAIJ_1;
3079       break;
3080     case 2:
3081       B->ops->mult            = MatMult_SeqBAIJ_2;
3082       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3083       B->ops->sor             = MatSOR_SeqBAIJ_2;
3084       break;
3085     case 3:
3086       B->ops->mult            = MatMult_SeqBAIJ_3;
3087       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3088       B->ops->sor             = MatSOR_SeqBAIJ_3;
3089       break;
3090     case 4:
3091       B->ops->mult            = MatMult_SeqBAIJ_4;
3092       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3093       B->ops->sor             = MatSOR_SeqBAIJ_4;
3094       break;
3095     case 5:
3096       B->ops->mult            = MatMult_SeqBAIJ_5;
3097       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3098       B->ops->sor             = MatSOR_SeqBAIJ_5;
3099       break;
3100     case 6:
3101       B->ops->mult            = MatMult_SeqBAIJ_6;
3102       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3103       B->ops->sor             = MatSOR_SeqBAIJ_6;
3104       break;
3105     case 7:
3106       B->ops->mult            = MatMult_SeqBAIJ_7;
3107       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3108       B->ops->sor             = MatSOR_SeqBAIJ_7;
3109       break;
3110     case 15:
3111       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3112       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3113       B->ops->sor             = MatSOR_SeqBAIJ_N;
3114       break;
3115     default:
3116       B->ops->mult            = MatMult_SeqBAIJ_N;
3117       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3118       B->ops->sor             = MatSOR_SeqBAIJ_N;
3119       break;
3120     }
3121   }
3122   B->rmap->bs  = bs;
3123   b->mbs       = mbs;
3124   b->nbs       = nbs;
3125   if (!skipallocation) {
3126     if (!b->imax) {
3127       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3128       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
3129       b->free_imax_ilen = PETSC_TRUE;
3130     }
3131     /* b->ilen will count nonzeros in each block row so far. */
3132     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3133     if (!nnz) {
3134       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3135       else if (nz <= 0)        nz = 1;
3136       for (i=0; i<mbs; i++) b->imax[i] = nz;
3137       nz = nz*mbs;
3138     } else {
3139       nz = 0;
3140       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3141     }
3142 
3143     /* allocate the matrix space */
3144     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3145     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3146     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3147     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3148     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3149     b->singlemalloc = PETSC_TRUE;
3150     b->i[0] = 0;
3151     for (i=1; i<mbs+1; i++) {
3152       b->i[i] = b->i[i-1] + b->imax[i-1];
3153     }
3154     b->free_a     = PETSC_TRUE;
3155     b->free_ij    = PETSC_TRUE;
3156   } else {
3157     b->free_a     = PETSC_FALSE;
3158     b->free_ij    = PETSC_FALSE;
3159   }
3160 
3161   B->rmap->bs          = bs;
3162   b->bs2              = bs2;
3163   b->mbs              = mbs;
3164   b->nz               = 0;
3165   b->maxnz            = nz*bs2;
3166   B->info.nz_unneeded = (PetscReal)b->maxnz;
3167   PetscFunctionReturn(0);
3168 }
3169 EXTERN_C_END
3170 
3171 EXTERN_C_BEGIN
3172 #undef __FUNCT__
3173 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3174 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3175 {
3176   PetscInt       i,m,nz,nz_max=0,*nnz;
3177   PetscScalar    *values=0;
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3182   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3183   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3184   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3185   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3186   m = B->rmap->n/bs;
3187 
3188   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3189   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3190   for(i=0; i<m; i++) {
3191     nz = ii[i+1]- ii[i];
3192     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3193     nz_max = PetscMax(nz_max, nz);
3194     nnz[i] = nz;
3195   }
3196   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3197   ierr = PetscFree(nnz);CHKERRQ(ierr);
3198 
3199   values = (PetscScalar*)V;
3200   if (!values) {
3201     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3202     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3203   }
3204   for (i=0; i<m; i++) {
3205     PetscInt          ncols  = ii[i+1] - ii[i];
3206     const PetscInt    *icols = jj + ii[i];
3207     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3208     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3209   }
3210   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3211   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3212   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3213 
3214   PetscFunctionReturn(0);
3215 }
3216 EXTERN_C_END
3217 
3218 
3219 EXTERN_C_BEGIN
3220 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3221 #if defined(PETSC_HAVE_MUMPS)
3222 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3223 #endif
3224 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3225 EXTERN_C_END
3226 
3227 /*MC
3228    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3229    block sparse compressed row format.
3230 
3231    Options Database Keys:
3232 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3233 
3234   Level: beginner
3235 
3236 .seealso: MatCreateSeqBAIJ()
3237 M*/
3238 
3239 
3240 EXTERN_C_BEGIN
3241 #undef __FUNCT__
3242 #define __FUNCT__ "MatCreate_SeqBAIJ"
3243 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3244 {
3245   PetscErrorCode ierr;
3246   PetscMPIInt    size;
3247   Mat_SeqBAIJ    *b;
3248 
3249   PetscFunctionBegin;
3250   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3251   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3252 
3253   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3254   B->data = (void*)b;
3255   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3256   B->mapping               = 0;
3257   b->row                   = 0;
3258   b->col                   = 0;
3259   b->icol                  = 0;
3260   b->reallocs              = 0;
3261   b->saved_values          = 0;
3262 
3263   b->roworiented           = PETSC_TRUE;
3264   b->nonew                 = 0;
3265   b->diag                  = 0;
3266   b->solve_work            = 0;
3267   b->mult_work             = 0;
3268   B->spptr                 = 0;
3269   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3270   b->keepnonzeropattern    = PETSC_FALSE;
3271   b->xtoy                  = 0;
3272   b->XtoY                  = 0;
3273   b->compressedrow.use     = PETSC_FALSE;
3274   b->compressedrow.nrows   = 0;
3275   b->compressedrow.i       = PETSC_NULL;
3276   b->compressedrow.rindex  = PETSC_NULL;
3277   b->compressedrow.checked = PETSC_FALSE;
3278   B->same_nonzero          = PETSC_FALSE;
3279 
3280   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3281                                      "MatGetFactorAvailable_seqbaij_petsc",
3282                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3283   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3284                                      "MatGetFactor_seqbaij_petsc",
3285                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3286 #if defined(PETSC_HAVE_MUMPS)
3287   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3288 #endif
3289   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3290                                      "MatInvertBlockDiagonal_SeqBAIJ",
3291                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3292   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3293                                      "MatStoreValues_SeqBAIJ",
3294                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3295   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3296                                      "MatRetrieveValues_SeqBAIJ",
3297                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3298   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3299                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3300                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3301   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3302                                      "MatConvert_SeqBAIJ_SeqAIJ",
3303                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3304   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3305                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3306                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3307   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3308                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3309                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3310   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3311                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3312                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3313   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3314   PetscFunctionReturn(0);
3315 }
3316 EXTERN_C_END
3317 
3318 #undef __FUNCT__
3319 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3320 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3321 {
3322   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3323   PetscErrorCode ierr;
3324   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3325 
3326   PetscFunctionBegin;
3327   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3328 
3329   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3330     c->imax = a->imax;
3331     c->ilen = a->ilen;
3332     c->free_imax_ilen = PETSC_FALSE;
3333   } else {
3334     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3335     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3336     for (i=0; i<mbs; i++) {
3337       c->imax[i] = a->imax[i];
3338       c->ilen[i] = a->ilen[i];
3339     }
3340     c->free_imax_ilen = PETSC_TRUE;
3341   }
3342 
3343   /* allocate the matrix space */
3344   if (mallocmatspace){
3345     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3346       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3347       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3348       c->singlemalloc = PETSC_FALSE;
3349       c->free_ij      = PETSC_FALSE;
3350       c->i            = a->i;
3351       c->j            = a->j;
3352       c->parent       = A;
3353       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3354       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3355       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3356     } else {
3357       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3358       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3359       c->singlemalloc = PETSC_TRUE;
3360       c->free_ij      = PETSC_TRUE;
3361       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3362       if (mbs > 0) {
3363 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3364 	if (cpvalues == MAT_COPY_VALUES) {
3365 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3366 	} else {
3367 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3368 	}
3369       }
3370     }
3371   }
3372 
3373   c->roworiented = a->roworiented;
3374   c->nonew       = a->nonew;
3375   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3376   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3377   c->bs2         = a->bs2;
3378   c->mbs         = a->mbs;
3379   c->nbs         = a->nbs;
3380 
3381   if (a->diag) {
3382     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3383       c->diag      = a->diag;
3384       c->free_diag = PETSC_FALSE;
3385     } else {
3386       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3387       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3388       for (i=0; i<mbs; i++) {
3389         c->diag[i] = a->diag[i];
3390       }
3391       c->free_diag = PETSC_TRUE;
3392     }
3393   } else c->diag        = 0;
3394   c->nz                 = a->nz;
3395   c->maxnz              = bs2*a->nz; /* Since we allocate exactly the right amount */
3396   c->solve_work         = 0;
3397   c->mult_work          = 0;
3398   c->free_a             = PETSC_TRUE;
3399   c->free_ij            = PETSC_TRUE;
3400   C->preallocated       = PETSC_TRUE;
3401   C->assembled          = PETSC_TRUE;
3402 
3403   c->compressedrow.use     = a->compressedrow.use;
3404   c->compressedrow.nrows   = a->compressedrow.nrows;
3405   c->compressedrow.checked = a->compressedrow.checked;
3406   if (a->compressedrow.checked && a->compressedrow.use){
3407     i = a->compressedrow.nrows;
3408     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3409     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3410     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3411     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3412   } else {
3413     c->compressedrow.use    = PETSC_FALSE;
3414     c->compressedrow.i      = PETSC_NULL;
3415     c->compressedrow.rindex = PETSC_NULL;
3416   }
3417   C->same_nonzero = A->same_nonzero;
3418   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3419   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3420   PetscFunctionReturn(0);
3421 }
3422 
3423 #undef __FUNCT__
3424 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3425 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3426 {
3427     PetscErrorCode ierr;
3428 
3429   PetscFunctionBegin;
3430   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3431   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3432   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3433   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3434   PetscFunctionReturn(0);
3435 }
3436 
3437 #undef __FUNCT__
3438 #define __FUNCT__ "MatLoad_SeqBAIJ"
3439 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3440 {
3441   Mat_SeqBAIJ    *a;
3442   PetscErrorCode ierr;
3443   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3444   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3445   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3446   PetscInt       *masked,nmask,tmp,bs2,ishift;
3447   PetscMPIInt    size;
3448   int            fd;
3449   PetscScalar    *aa;
3450   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3451 
3452   PetscFunctionBegin;
3453   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3454   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3455   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3456   bs2  = bs*bs;
3457 
3458   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3459   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3460   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3461   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3462   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3463   M = header[1]; N = header[2]; nz = header[3];
3464 
3465   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3466   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3467 
3468   /*
3469      This code adds extra rows to make sure the number of rows is
3470     divisible by the blocksize
3471   */
3472   mbs        = M/bs;
3473   extra_rows = bs - M + bs*(mbs);
3474   if (extra_rows == bs) extra_rows = 0;
3475   else                  mbs++;
3476   if (extra_rows) {
3477     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3478   }
3479 
3480   /* Set global sizes if not already set */
3481   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3482     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3483   } else { /* Check if the matrix global sizes are correct */
3484     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3485     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3486   }
3487 
3488   /* read in row lengths */
3489   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3490   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3491   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3492 
3493   /* read in column indices */
3494   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3495   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3496   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3497 
3498   /* loop over row lengths determining block row lengths */
3499   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3500   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3501   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3502   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3503   rowcount = 0;
3504   nzcount = 0;
3505   for (i=0; i<mbs; i++) {
3506     nmask = 0;
3507     for (j=0; j<bs; j++) {
3508       kmax = rowlengths[rowcount];
3509       for (k=0; k<kmax; k++) {
3510         tmp = jj[nzcount++]/bs;
3511         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3512       }
3513       rowcount++;
3514     }
3515     browlengths[i] += nmask;
3516     /* zero out the mask elements we set */
3517     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3518   }
3519 
3520   /* Do preallocation  */
3521   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3522   a = (Mat_SeqBAIJ*)newmat->data;
3523 
3524   /* set matrix "i" values */
3525   a->i[0] = 0;
3526   for (i=1; i<= mbs; i++) {
3527     a->i[i]      = a->i[i-1] + browlengths[i-1];
3528     a->ilen[i-1] = browlengths[i-1];
3529   }
3530   a->nz         = 0;
3531   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3532 
3533   /* read in nonzero values */
3534   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3535   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3536   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3537 
3538   /* set "a" and "j" values into matrix */
3539   nzcount = 0; jcount = 0;
3540   for (i=0; i<mbs; i++) {
3541     nzcountb = nzcount;
3542     nmask    = 0;
3543     for (j=0; j<bs; j++) {
3544       kmax = rowlengths[i*bs+j];
3545       for (k=0; k<kmax; k++) {
3546         tmp = jj[nzcount++]/bs;
3547 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3548       }
3549     }
3550     /* sort the masked values */
3551     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3552 
3553     /* set "j" values into matrix */
3554     maskcount = 1;
3555     for (j=0; j<nmask; j++) {
3556       a->j[jcount++]  = masked[j];
3557       mask[masked[j]] = maskcount++;
3558     }
3559     /* set "a" values into matrix */
3560     ishift = bs2*a->i[i];
3561     for (j=0; j<bs; j++) {
3562       kmax = rowlengths[i*bs+j];
3563       for (k=0; k<kmax; k++) {
3564         tmp       = jj[nzcountb]/bs ;
3565         block     = mask[tmp] - 1;
3566         point     = jj[nzcountb] - bs*tmp;
3567         idx       = ishift + bs2*block + j + bs*point;
3568         a->a[idx] = (MatScalar)aa[nzcountb++];
3569       }
3570     }
3571     /* zero out the mask elements we set */
3572     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3573   }
3574   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3575 
3576   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3577   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3578   ierr = PetscFree(aa);CHKERRQ(ierr);
3579   ierr = PetscFree(jj);CHKERRQ(ierr);
3580   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3581 
3582   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3583   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3584   ierr = MatView_Private(newmat);CHKERRQ(ierr);
3585   PetscFunctionReturn(0);
3586 }
3587 
3588 #undef __FUNCT__
3589 #define __FUNCT__ "MatCreateSeqBAIJ"
3590 /*@C
3591    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3592    compressed row) format.  For good matrix assembly performance the
3593    user should preallocate the matrix storage by setting the parameter nz
3594    (or the array nnz).  By setting these parameters accurately, performance
3595    during matrix assembly can be increased by more than a factor of 50.
3596 
3597    Collective on MPI_Comm
3598 
3599    Input Parameters:
3600 +  comm - MPI communicator, set to PETSC_COMM_SELF
3601 .  bs - size of block
3602 .  m - number of rows
3603 .  n - number of columns
3604 .  nz - number of nonzero blocks  per block row (same for all rows)
3605 -  nnz - array containing the number of nonzero blocks in the various block rows
3606          (possibly different for each block row) or PETSC_NULL
3607 
3608    Output Parameter:
3609 .  A - the matrix
3610 
3611    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3612    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3613    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3614 
3615    Options Database Keys:
3616 .   -mat_no_unroll - uses code that does not unroll the loops in the
3617                      block calculations (much slower)
3618 .    -mat_block_size - size of the blocks to use
3619 
3620    Level: intermediate
3621 
3622    Notes:
3623    The number of rows and columns must be divisible by blocksize.
3624 
3625    If the nnz parameter is given then the nz parameter is ignored
3626 
3627    A nonzero block is any block that as 1 or more nonzeros in it
3628 
3629    The block AIJ format is fully compatible with standard Fortran 77
3630    storage.  That is, the stored row and column indices can begin at
3631    either one (as in Fortran) or zero.  See the users' manual for details.
3632 
3633    Specify the preallocated storage with either nz or nnz (not both).
3634    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3635    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3636    matrices.
3637 
3638 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3639 @*/
3640 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3641 {
3642   PetscErrorCode ierr;
3643 
3644   PetscFunctionBegin;
3645   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3646   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3647   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3648   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3649   PetscFunctionReturn(0);
3650 }
3651 
3652 #undef __FUNCT__
3653 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3654 /*@C
3655    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3656    per row in the matrix. For good matrix assembly performance the
3657    user should preallocate the matrix storage by setting the parameter nz
3658    (or the array nnz).  By setting these parameters accurately, performance
3659    during matrix assembly can be increased by more than a factor of 50.
3660 
3661    Collective on MPI_Comm
3662 
3663    Input Parameters:
3664 +  A - the matrix
3665 .  bs - size of block
3666 .  nz - number of block nonzeros per block row (same for all rows)
3667 -  nnz - array containing the number of block nonzeros in the various block rows
3668          (possibly different for each block row) or PETSC_NULL
3669 
3670    Options Database Keys:
3671 .   -mat_no_unroll - uses code that does not unroll the loops in the
3672                      block calculations (much slower)
3673 .    -mat_block_size - size of the blocks to use
3674 
3675    Level: intermediate
3676 
3677    Notes:
3678    If the nnz parameter is given then the nz parameter is ignored
3679 
3680    You can call MatGetInfo() to get information on how effective the preallocation was;
3681    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3682    You can also run with the option -info and look for messages with the string
3683    malloc in them to see if additional memory allocation was needed.
3684 
3685    The block AIJ format is fully compatible with standard Fortran 77
3686    storage.  That is, the stored row and column indices can begin at
3687    either one (as in Fortran) or zero.  See the users' manual for details.
3688 
3689    Specify the preallocated storage with either nz or nnz (not both).
3690    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3691    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3692 
3693 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3694 @*/
3695 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3696 {
3697   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3698 
3699   PetscFunctionBegin;
3700   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3701   if (f) {
3702     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3703   }
3704   PetscFunctionReturn(0);
3705 }
3706 
3707 #undef __FUNCT__
3708 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3709 /*@C
3710    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3711    (the default sequential PETSc format).
3712 
3713    Collective on MPI_Comm
3714 
3715    Input Parameters:
3716 +  A - the matrix
3717 .  i - the indices into j for the start of each local row (starts with zero)
3718 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3719 -  v - optional values in the matrix
3720 
3721    Level: developer
3722 
3723 .keywords: matrix, aij, compressed row, sparse
3724 
3725 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3726 @*/
3727 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3728 {
3729   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3730 
3731   PetscFunctionBegin;
3732   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3733   if (f) {
3734     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3735   }
3736   PetscFunctionReturn(0);
3737 }
3738 
3739 
3740 #undef __FUNCT__
3741 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3742 /*@
3743      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3744               (upper triangular entries in CSR format) provided by the user.
3745 
3746      Collective on MPI_Comm
3747 
3748    Input Parameters:
3749 +  comm - must be an MPI communicator of size 1
3750 .  bs - size of block
3751 .  m - number of rows
3752 .  n - number of columns
3753 .  i - row indices
3754 .  j - column indices
3755 -  a - matrix values
3756 
3757    Output Parameter:
3758 .  mat - the matrix
3759 
3760    Level: intermediate
3761 
3762    Notes:
3763        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3764     once the matrix is destroyed
3765 
3766        You cannot set new nonzero locations into this matrix, that will generate an error.
3767 
3768        The i and j indices are 0 based
3769 
3770 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3771 
3772 @*/
3773 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3774 {
3775   PetscErrorCode ierr;
3776   PetscInt       ii;
3777   Mat_SeqBAIJ    *baij;
3778 
3779   PetscFunctionBegin;
3780   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3781   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3782 
3783   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3784   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3785   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3786   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3787   baij = (Mat_SeqBAIJ*)(*mat)->data;
3788   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3789   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3790 
3791   baij->i = i;
3792   baij->j = j;
3793   baij->a = a;
3794   baij->singlemalloc = PETSC_FALSE;
3795   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3796   baij->free_a       = PETSC_FALSE;
3797   baij->free_ij       = PETSC_FALSE;
3798 
3799   for (ii=0; ii<m; ii++) {
3800     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3801 #if defined(PETSC_USE_DEBUG)
3802     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3803 #endif
3804   }
3805 #if defined(PETSC_USE_DEBUG)
3806   for (ii=0; ii<baij->i[m]; ii++) {
3807     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3808     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3809   }
3810 #endif
3811 
3812   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3813   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3814   PetscFunctionReturn(0);
3815 }
3816