xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 71fd2e92b2fee25f70818ffc1dca9fc57f8584d7)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59     ierr = PetscLogObjectMemory(A,2*bs*bs*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 1:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 1*diag_offset[i];
68         diag[0]  = odiag[0];
69         mdiag[0] = odiag[0];
70         diag[0]  = 1.0 / (diag[0] + shift);
71         diag    += 1;
72         mdiag   += 1;
73       }
74       break;
75     case 2:
76       for (i=0; i<mbs; i++) {
77         odiag   = v + 4*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
80 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
81 	diag    += 4;
82 	mdiag   += 4;
83       }
84       break;
85     case 3:
86       for (i=0; i<mbs; i++) {
87         odiag    = v + 9*diag_offset[i];
88         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
89         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
90         diag[8]  = odiag[8];
91         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
92         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
93         mdiag[8] = odiag[8];
94 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
95 	diag    += 9;
96 	mdiag   += 9;
97       }
98       break;
99     case 4:
100       for (i=0; i<mbs; i++) {
101         odiag  = v + 16*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
105 	diag  += 16;
106 	mdiag += 16;
107       }
108       break;
109     case 5:
110       for (i=0; i<mbs; i++) {
111         odiag = v + 25*diag_offset[i];
112         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
113         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
114 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
115 	diag  += 25;
116 	mdiag += 25;
117       }
118       break;
119     case 6:
120       for (i=0; i<mbs; i++) {
121         odiag = v + 36*diag_offset[i];
122         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
123         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
124 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
125 	diag  += 36;
126 	mdiag += 36;
127       }
128       break;
129     default:
130       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 EXTERN_C_END
136 
137 #undef __FUNCT__
138 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
139 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
140 {
141   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
142   PetscScalar        *x,x1,s1;
143   const PetscScalar  *b;
144   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
145   PetscErrorCode     ierr;
146   PetscInt           m = a->mbs,i,i2,nz,j;
147   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
148 
149   PetscFunctionBegin;
150   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
151   its = its*lits;
152   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
153   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
154   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
155   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
156   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
157 
158   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
159 
160   diag  = a->diag;
161   idiag = a->idiag;
162   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
163   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
164 
165   if (flag & SOR_ZERO_INITIAL_GUESS) {
166     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
167       x[0] = b[0]*idiag[0];
168       i2     = 1;
169       idiag += 1;
170       for (i=1; i<m; i++) {
171         v     = aa + ai[i];
172         vi    = aj + ai[i];
173         nz    = diag[i] - ai[i];
174         s1    = b[i2];
175         for (j=0; j<nz; j++) {
176           s1 -= v[j]*x[vi[j]];
177         }
178         x[i2]   = idiag[0]*s1;
179         idiag   += 1;
180         i2      += 1;
181       }
182       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
183       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
184     }
185     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
186         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
187       i2    = 0;
188       mdiag = a->idiag+a->mbs;
189       for (i=0; i<m; i++) {
190         x1      = x[i2];
191         x[i2]   = mdiag[0]*x1;
192         mdiag  += 1;
193         i2     += 1;
194       }
195       ierr = PetscLogFlops(m);CHKERRQ(ierr);
196     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
197       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
198     }
199     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
200       idiag   = a->idiag+a->mbs - 1;
201       i2      = m - 1;
202       x1      = x[i2];
203       x[i2]   = idiag[0]*x1;
204       idiag -= 1;
205       i2    -= 1;
206       for (i=m-2; i>=0; i--) {
207         v     = aa + (diag[i]+1);
208         vi    = aj + diag[i] + 1;
209         nz    = ai[i+1] - diag[i] - 1;
210         s1    = x[i2];
211         for (j=0; j<nz; j++) {
212           s1 -= v[j]*x[vi[j]];
213         }
214         x[i2]   = idiag[0]*s1;
215         idiag   -= 1;
216         i2      -= 1;
217       }
218       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
219     }
220   } else {
221     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
222   }
223   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
224   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
225   PetscFunctionReturn(0);
226 }
227 
228 #undef __FUNCT__
229 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
230 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
231 {
232   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
233   PetscScalar        *x,x1,x2,s1,s2;
234   const PetscScalar  *b;
235   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
236   PetscErrorCode     ierr;
237   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
238   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
239 
240   PetscFunctionBegin;
241   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
242   its = its*lits;
243   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
244   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
245   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
246   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
247   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
248 
249   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
250 
251   diag  = a->diag;
252   idiag = a->idiag;
253   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
254   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
255 
256   if (flag & SOR_ZERO_INITIAL_GUESS) {
257     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
258       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
259       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
260       i2     = 2;
261       idiag += 4;
262       for (i=1; i<m; i++) {
263 	v     = aa + 4*ai[i];
264 	vi    = aj + ai[i];
265 	nz    = diag[i] - ai[i];
266 	s1    = b[i2]; s2 = b[i2+1];
267         for (j=0; j<nz; j++) {
268 	  idx  = 2*vi[j];
269           it   = 4*j;
270 	  x1   = x[idx]; x2 = x[1+idx];
271 	  s1  -= v[it]*x1 + v[it+2]*x2;
272 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
273 	}
274 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
275 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
276         idiag   += 4;
277         i2      += 2;
278       }
279       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
280       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
281     }
282     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
283         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
284       i2    = 0;
285       mdiag = a->idiag+4*a->mbs;
286       for (i=0; i<m; i++) {
287         x1      = x[i2]; x2 = x[i2+1];
288         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
289         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
290         mdiag  += 4;
291         i2     += 2;
292       }
293       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
294     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
295       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
296     }
297     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
298       idiag   = a->idiag+4*a->mbs - 4;
299       i2      = 2*m - 2;
300       x1      = x[i2]; x2 = x[i2+1];
301       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
302       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
303       idiag -= 4;
304       i2    -= 2;
305       for (i=m-2; i>=0; i--) {
306 	v     = aa + 4*(diag[i]+1);
307 	vi    = aj + diag[i] + 1;
308 	nz    = ai[i+1] - diag[i] - 1;
309 	s1    = x[i2]; s2 = x[i2+1];
310         for (j=0; j<nz; j++) {
311  	  idx  = 2*vi[j];
312           it   = 4*j;
313 	  x1   = x[idx]; x2 = x[1+idx];
314 	  s1  -= v[it]*x1 + v[it+2]*x2;
315 	  s2  -= v[it+1]*x1 + v[it+3]*x2;
316 	}
317 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
318 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
319         idiag   -= 4;
320         i2      -= 2;
321       }
322       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
323     }
324   } else {
325     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
326   }
327   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
328   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
329   PetscFunctionReturn(0);
330 }
331 
332 #undef __FUNCT__
333 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
334 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
335 {
336   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
337   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
338   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
339   const PetscScalar  *b;
340   PetscErrorCode     ierr;
341   PetscInt           m = a->mbs,i,i2,nz,idx;
342   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
343 
344   PetscFunctionBegin;
345   its = its*lits;
346   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
347   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
348   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
349   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
350   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
351   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
352 
353   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
354 
355   diag  = a->diag;
356   idiag = a->idiag;
357   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
358   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
359 
360   if (flag & SOR_ZERO_INITIAL_GUESS) {
361     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
362       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
363       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
364       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
365       i2     = 3;
366       idiag += 9;
367       for (i=1; i<m; i++) {
368         v     = aa + 9*ai[i];
369         vi    = aj + ai[i];
370         nz    = diag[i] - ai[i];
371         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
372         while (nz--) {
373           idx  = 3*(*vi++);
374           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
375           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
376           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
377           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
378           v   += 9;
379         }
380         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
381         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
382         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
383         idiag   += 9;
384         i2      += 3;
385       }
386       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
387       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
388     }
389     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
390         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
391       i2    = 0;
392       mdiag = a->idiag+9*a->mbs;
393       for (i=0; i<m; i++) {
394         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
396         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
397         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
398         mdiag  += 9;
399         i2     += 3;
400       }
401       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
402     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
404     }
405     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406       idiag   = a->idiag+9*a->mbs - 9;
407       i2      = 3*m - 3;
408       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
409       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
410       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
411       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
412       idiag -= 9;
413       i2    -= 3;
414       for (i=m-2; i>=0; i--) {
415         v     = aa + 9*(diag[i]+1);
416         vi    = aj + diag[i] + 1;
417         nz    = ai[i+1] - diag[i] - 1;
418         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
419         while (nz--) {
420           idx  = 3*(*vi++);
421           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
422           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
423           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
424           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
425           v   += 9;
426         }
427         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
428         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
429         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
430         idiag   -= 9;
431         i2      -= 3;
432       }
433       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
445 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
449   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
450   const PetscScalar  *b;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
457   its = its*lits;
458   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
459   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
460   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
461   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
462   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
463 
464   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
465 
466   diag  = a->diag;
467   idiag = a->idiag;
468   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
469   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
470 
471   if (flag & SOR_ZERO_INITIAL_GUESS) {
472     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
473       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
474       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
475       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
476       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
477       i2     = 4;
478       idiag += 16;
479       for (i=1; i<m; i++) {
480 	v     = aa + 16*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
484 	while (nz--) {
485 	  idx  = 4*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
487 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
488 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
489 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
490 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
491 	  v   += 16;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
494 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
495 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
496 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
497         idiag   += 16;
498         i2      += 4;
499       }
500       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
501       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
502     }
503     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
504         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
505       i2    = 0;
506       mdiag = a->idiag+16*a->mbs;
507       for (i=0; i<m; i++) {
508         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
509         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
510         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
511         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
512         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
513         mdiag  += 16;
514         i2     += 4;
515       }
516       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
517     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
518       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
519     }
520     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
521       idiag   = a->idiag+16*a->mbs - 16;
522       i2      = 4*m - 4;
523       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
524       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
525       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
526       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
527       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
528       idiag -= 16;
529       i2    -= 4;
530       for (i=m-2; i>=0; i--) {
531 	v     = aa + 16*(diag[i]+1);
532 	vi    = aj + diag[i] + 1;
533 	nz    = ai[i+1] - diag[i] - 1;
534 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
535 	while (nz--) {
536 	  idx  = 4*(*vi++);
537 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
538 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
539 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
540 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
541 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
542 	  v   += 16;
543 	}
544 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
545 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
546 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
547 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
548         idiag   -= 16;
549         i2      -= 4;
550       }
551       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
552     }
553   } else {
554     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
555   }
556   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
557   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
558   PetscFunctionReturn(0);
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
563 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
564 {
565   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
566   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
567   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
568   const PetscScalar  *b;
569   PetscErrorCode     ierr;
570   PetscInt           m = a->mbs,i,i2,nz,idx;
571   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
572 
573   PetscFunctionBegin;
574   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
575   its = its*lits;
576   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
577   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
578   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
579   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
580   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
581 
582   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
583 
584   diag  = a->diag;
585   idiag = a->idiag;
586   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
587   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
588 
589   if (flag & SOR_ZERO_INITIAL_GUESS) {
590     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
591       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
592       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
593       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
594       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
595       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
596       i2     = 5;
597       idiag += 25;
598       for (i=1; i<m; i++) {
599 	v     = aa + 25*ai[i];
600 	vi    = aj + ai[i];
601 	nz    = diag[i] - ai[i];
602 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
603 	while (nz--) {
604 	  idx  = 5*(*vi++);
605 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
606 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
607 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
608 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
609 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
610 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
611 	  v   += 25;
612 	}
613 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
614 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
615 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
616 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
617 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
618         idiag   += 25;
619         i2      += 5;
620       }
621       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
622       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
623     }
624     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
625         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
626       i2    = 0;
627       mdiag = a->idiag+25*a->mbs;
628       for (i=0; i<m; i++) {
629         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
630         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
631         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
632         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
633         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
634         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
635         mdiag  += 25;
636         i2     += 5;
637       }
638       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
639     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
640       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
641     }
642     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
643       idiag   = a->idiag+25*a->mbs - 25;
644       i2      = 5*m - 5;
645       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
646       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
647       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
648       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
649       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
650       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
651       idiag -= 25;
652       i2    -= 5;
653       for (i=m-2; i>=0; i--) {
654 	v     = aa + 25*(diag[i]+1);
655 	vi    = aj + diag[i] + 1;
656 	nz    = ai[i+1] - diag[i] - 1;
657 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
658 	while (nz--) {
659 	  idx  = 5*(*vi++);
660 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
661 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
662 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
663 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
664 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
665 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
666 	  v   += 25;
667 	}
668 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
669 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
670 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
671 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
672 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
673         idiag   -= 25;
674         i2      -= 5;
675       }
676       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
677     }
678   } else {
679     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
680   }
681   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
682   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
683   PetscFunctionReturn(0);
684 }
685 
686 #undef __FUNCT__
687 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
688 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
689 {
690   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
691   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
692   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
693   const PetscScalar  *b;
694   PetscErrorCode     ierr;
695   PetscInt           m = a->mbs,i,i2,nz,idx;
696   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
697 
698   PetscFunctionBegin;
699   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
700   its = its*lits;
701   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
702   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
703   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
704   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
705   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
706 
707   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
708 
709   diag  = a->diag;
710   idiag = a->idiag;
711   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
712   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
713 
714   if (flag & SOR_ZERO_INITIAL_GUESS) {
715     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
716       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
717       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
718       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
719       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
720       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
721       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
722       i2     = 6;
723       idiag += 36;
724       for (i=1; i<m; i++) {
725         v     = aa + 36*ai[i];
726         vi    = aj + ai[i];
727         nz    = diag[i] - ai[i];
728         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
729         while (nz--) {
730           idx  = 6*(*vi++);
731           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
732           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
733           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
734           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
735           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
736           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
737           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
738           v   += 36;
739         }
740         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
741         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
742         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
743         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
744         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
745         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
746         idiag   += 36;
747         i2      += 6;
748       }
749       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
750       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
751     }
752     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
753         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
754       i2    = 0;
755       mdiag = a->idiag+36*a->mbs;
756       for (i=0; i<m; i++) {
757         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
758         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
759         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
760         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
761         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
762         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
763         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
764         mdiag  += 36;
765         i2     += 6;
766       }
767       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
768     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
769       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
770     }
771     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
772       idiag   = a->idiag+36*a->mbs - 36;
773       i2      = 6*m - 6;
774       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
775       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
776       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
777       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
778       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
779       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
780       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
781       idiag -= 36;
782       i2    -= 6;
783       for (i=m-2; i>=0; i--) {
784         v     = aa + 36*(diag[i]+1);
785         vi    = aj + diag[i] + 1;
786         nz    = ai[i+1] - diag[i] - 1;
787         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
788         while (nz--) {
789           idx  = 6*(*vi++);
790           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
791           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
792           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
793           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
794           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
795           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
796           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
797           v   += 36;
798         }
799         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
800         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
801         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
802         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
803         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
804         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
805         idiag   -= 36;
806         i2      -= 6;
807       }
808       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
809     }
810   } else {
811     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
812   }
813   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
814   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
815   PetscFunctionReturn(0);
816 }
817 
818 #undef __FUNCT__
819 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
820 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
821 {
822   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
823   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
824   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
825   const PetscScalar  *b;
826   PetscErrorCode     ierr;
827   PetscInt           m = a->mbs,i,i2,nz,idx;
828   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
829 
830   PetscFunctionBegin;
831   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
832   its = its*lits;
833   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
834   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
835   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
836   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
837   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
838 
839   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
840 
841   diag  = a->diag;
842   idiag = a->idiag;
843   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
844   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
845 
846   if (flag & SOR_ZERO_INITIAL_GUESS) {
847     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
848       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
849       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
850       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
851       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
852       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
853       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
854       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
855       i2     = 7;
856       idiag += 49;
857       for (i=1; i<m; i++) {
858         v     = aa + 49*ai[i];
859         vi    = aj + ai[i];
860         nz    = diag[i] - ai[i];
861         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
862         while (nz--) {
863           idx  = 7*(*vi++);
864           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
865           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
866           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
867           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
868           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
869           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
870           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
871           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
872           v   += 49;
873         }
874         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
875         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
876         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
877         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
878         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
879         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
880         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
881         idiag   += 49;
882         i2      += 7;
883       }
884       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
885       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
886     }
887     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
888         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
889       i2    = 0;
890       mdiag = a->idiag+49*a->mbs;
891       for (i=0; i<m; i++) {
892         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
893         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
894         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
895         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
896         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
897         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
898         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
899         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
900         mdiag  += 36;
901         i2     += 6;
902       }
903       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
904     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
905       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
906     }
907     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
908       idiag   = a->idiag+49*a->mbs - 49;
909       i2      = 7*m - 7;
910       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
911       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
912       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
913       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
914       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
915       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
916       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
917       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
918       idiag -= 49;
919       i2    -= 7;
920       for (i=m-2; i>=0; i--) {
921         v     = aa + 49*(diag[i]+1);
922         vi    = aj + diag[i] + 1;
923         nz    = ai[i+1] - diag[i] - 1;
924         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
925         while (nz--) {
926           idx  = 7*(*vi++);
927           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
928           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
929           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
930           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
931           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
932           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
933           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
934           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
935           v   += 49;
936         }
937         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
938         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
939         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
940         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
941         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
942         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
943         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
944         idiag   -= 49;
945         i2      -= 7;
946       }
947       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
948     }
949   } else {
950     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
951   }
952   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
953   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
954   PetscFunctionReturn(0);
955 }
956 
957 /*
958     Special version for direct calls from Fortran (Used in PETSc-fun3d)
959 */
960 #if defined(PETSC_HAVE_FORTRAN_CAPS)
961 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
962 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
963 #define matsetvaluesblocked4_ matsetvaluesblocked4
964 #endif
965 
966 EXTERN_C_BEGIN
967 #undef __FUNCT__
968 #define __FUNCT__ "matsetvaluesblocked4_"
969 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
970 {
971   Mat               A = *AA;
972   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
973   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
974   PetscInt          *ai=a->i,*ailen=a->ilen;
975   PetscInt          *aj=a->j,stepval,lastcol = -1;
976   const PetscScalar *value = v;
977   MatScalar         *ap,*aa = a->a,*bap;
978 
979   PetscFunctionBegin;
980   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
981   stepval = (n-1)*4;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k];
984     rp   = aj + ai[row];
985     ap   = aa + 16*ai[row];
986     nrow = ailen[row];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col = in[l];
991       if (col <= lastcol) low = 0; else high = nrow;
992       lastcol = col;
993       value = v + k*(stepval+4 + l)*4;
994       while (high-low > 7) {
995         t = (low+high)/2;
996         if (rp[t] > col) high = t;
997         else             low  = t;
998       }
999       for (i=low; i<high; i++) {
1000         if (rp[i] > col) break;
1001         if (rp[i] == col) {
1002           bap  = ap +  16*i;
1003           for (ii=0; ii<4; ii++,value+=stepval) {
1004             for (jj=ii; jj<16; jj+=4) {
1005               bap[jj] += *value++;
1006             }
1007           }
1008           goto noinsert2;
1009         }
1010       }
1011       N = nrow++ - 1;
1012       high++; /* added new column index thus must search to one higher than before */
1013       /* shift up all the later entries in this row */
1014       for (ii=N; ii>=i; ii--) {
1015         rp[ii+1] = rp[ii];
1016         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1017       }
1018       if (N >= i) {
1019         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1020       }
1021       rp[i] = col;
1022       bap   = ap +  16*i;
1023       for (ii=0; ii<4; ii++,value+=stepval) {
1024         for (jj=ii; jj<16; jj+=4) {
1025           bap[jj] = *value++;
1026         }
1027       }
1028       noinsert2:;
1029       low = i;
1030     }
1031     ailen[row] = nrow;
1032   }
1033   PetscFunctionReturnVoid();
1034 }
1035 EXTERN_C_END
1036 
1037 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1038 #define matsetvalues4_ MATSETVALUES4
1039 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1040 #define matsetvalues4_ matsetvalues4
1041 #endif
1042 
1043 EXTERN_C_BEGIN
1044 #undef __FUNCT__
1045 #define __FUNCT__ "MatSetValues4_"
1046 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1047 {
1048   Mat         A = *AA;
1049   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1050   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1051   PetscInt    *ai=a->i,*ailen=a->ilen;
1052   PetscInt    *aj=a->j,brow,bcol;
1053   PetscInt    ridx,cidx,lastcol = -1;
1054   MatScalar   *ap,value,*aa=a->a,*bap;
1055 
1056   PetscFunctionBegin;
1057   for (k=0; k<m; k++) { /* loop over added rows */
1058     row  = im[k]; brow = row/4;
1059     rp   = aj + ai[brow];
1060     ap   = aa + 16*ai[brow];
1061     nrow = ailen[brow];
1062     low  = 0;
1063     high = nrow;
1064     for (l=0; l<n; l++) { /* loop over added columns */
1065       col = in[l]; bcol = col/4;
1066       ridx = row % 4; cidx = col % 4;
1067       value = v[l + k*n];
1068       if (col <= lastcol) low = 0; else high = nrow;
1069       lastcol = col;
1070       while (high-low > 7) {
1071         t = (low+high)/2;
1072         if (rp[t] > bcol) high = t;
1073         else              low  = t;
1074       }
1075       for (i=low; i<high; i++) {
1076         if (rp[i] > bcol) break;
1077         if (rp[i] == bcol) {
1078           bap  = ap +  16*i + 4*cidx + ridx;
1079           *bap += value;
1080           goto noinsert1;
1081         }
1082       }
1083       N = nrow++ - 1;
1084       high++; /* added new column thus must search to one higher than before */
1085       /* shift up all the later entries in this row */
1086       for (ii=N; ii>=i; ii--) {
1087         rp[ii+1] = rp[ii];
1088         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1089       }
1090       if (N>=i) {
1091         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1092       }
1093       rp[i]                    = bcol;
1094       ap[16*i + 4*cidx + ridx] = value;
1095       noinsert1:;
1096       low = i;
1097     }
1098     ailen[brow] = nrow;
1099   }
1100   PetscFunctionReturnVoid();
1101 }
1102 EXTERN_C_END
1103 
1104 /*
1105      Checks for missing diagonals
1106 */
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1109 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1110 {
1111   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113   PetscInt       *diag,*jj = a->j,i;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1117   *missing = PETSC_FALSE;
1118   if (A->rmap->n > 0 && !jj) {
1119     *missing  = PETSC_TRUE;
1120     if (d) *d = 0;
1121     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1122   } else {
1123     diag     = a->diag;
1124     for (i=0; i<a->mbs; i++) {
1125       if (jj[diag[i]] != i) {
1126         *missing  = PETSC_TRUE;
1127         if (d) *d = i;
1128         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1129       }
1130     }
1131   }
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1137 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1138 {
1139   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1140   PetscErrorCode ierr;
1141   PetscInt       i,j,m = a->mbs;
1142 
1143   PetscFunctionBegin;
1144   if (!a->diag) {
1145     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1146     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1147     a->free_diag = PETSC_TRUE;
1148   }
1149   for (i=0; i<m; i++) {
1150     a->diag[i] = a->i[i+1];
1151     for (j=a->i[i]; j<a->i[i+1]; j++) {
1152       if (a->j[j] == i) {
1153         a->diag[i] = j;
1154         break;
1155       }
1156     }
1157   }
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 
1162 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1163 
1164 #undef __FUNCT__
1165 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1166 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1167 {
1168   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1169   PetscErrorCode ierr;
1170   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1171   PetscInt       *tia, *tja;
1172 
1173   PetscFunctionBegin;
1174   *nn = n;
1175   if (!ia) PetscFunctionReturn(0);
1176   if (symmetric) {
1177     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1178   } else {
1179     tia = a->i; tja = a->j;
1180   }
1181 
1182   if (!blockcompressed && bs > 1) {
1183     (*nn) *= bs;
1184     nbs    = bs;
1185     /* malloc & create the natural set of indices */
1186     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1187     if (n) {
1188       (*ia)[0] = 0;
1189       for (j=1; j<bs; j++) {
1190         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1191       }
1192     }
1193 
1194     for (i=1; i<n; i++) {
1195       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1196       for (j=1; j<bs; j++) {
1197         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1198       }
1199     }
1200     if (n) {
1201       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1202     }
1203 
1204     if (ja) {
1205       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1206       cnt = 0;
1207       for (i=0; i<n; i++) {
1208         for (j=0; j<bs; j++) {
1209           for (k=tia[i]; k<tia[i+1]; k++) {
1210             for (l=0; l<bs; l++) {
1211               (*ja)[cnt++] = bs*tja[k] + l;
1212 	    }
1213           }
1214         }
1215       }
1216     }
1217 
1218     n     *= bs;
1219     nz *= bs*bs;
1220     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1221       ierr = PetscFree(tia);CHKERRQ(ierr);
1222       ierr = PetscFree(tja);CHKERRQ(ierr);
1223     }
1224   } else if (oshift == 1) {
1225     if (symmetric) {
1226       PetscInt nz = tia[A->rmap->n/bs];
1227       /*  add 1 to i and j indices */
1228       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1229       *ia = tia;
1230       if (ja) {
1231 	for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1232         *ja = tja;
1233       }
1234     } else {
1235       PetscInt nz = a->i[A->rmap->n/bs];
1236       /* malloc space and  add 1 to i and j indices */
1237       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1238       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1239       if (ja) {
1240 	ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1241 	for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1242       }
1243     }
1244   } else {
1245     *ia = tia;
1246     if (ja) *ja = tja;
1247   }
1248 
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1254 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1255 {
1256   PetscErrorCode ierr;
1257 
1258   PetscFunctionBegin;
1259   if (!ia) PetscFunctionReturn(0);
1260   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1261     ierr = PetscFree(*ia);CHKERRQ(ierr);
1262     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1263   }
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 #undef __FUNCT__
1268 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1269 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1270 {
1271   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1272   PetscErrorCode ierr;
1273 
1274   PetscFunctionBegin;
1275 #if defined(PETSC_USE_LOG)
1276   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1277 #endif
1278   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1279   if (a->row) {
1280     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1281   }
1282   if (a->col) {
1283     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1284   }
1285   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1286   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1287   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1288   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1289   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1290   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1291   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1292   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1293   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1294 
1295   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1296   if (a->parent) {ierr = MatDestroy(a->parent);CHKERRQ(ierr);}
1297   ierr = PetscFree(a);CHKERRQ(ierr);
1298 
1299   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1300   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1301   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1302   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1303   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1304   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1305   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1306   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1307   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1313 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1314 {
1315   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   switch (op) {
1320   case MAT_ROW_ORIENTED:
1321     a->roworiented    = flg;
1322     break;
1323   case MAT_KEEP_NONZERO_PATTERN:
1324     a->keepnonzeropattern = flg;
1325     break;
1326   case MAT_NEW_NONZERO_LOCATIONS:
1327     a->nonew          = (flg ? 0 : 1);
1328     break;
1329   case MAT_NEW_NONZERO_LOCATION_ERR:
1330     a->nonew          = (flg ? -1 : 0);
1331     break;
1332   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1333     a->nonew          = (flg ? -2 : 0);
1334     break;
1335   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1336     a->nounused       = (flg ? -1 : 0);
1337     break;
1338   case MAT_NEW_DIAGONALS:
1339   case MAT_IGNORE_OFF_PROC_ENTRIES:
1340   case MAT_USE_HASH_TABLE:
1341     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1342     break;
1343   case MAT_SYMMETRIC:
1344   case MAT_STRUCTURALLY_SYMMETRIC:
1345   case MAT_HERMITIAN:
1346   case MAT_SYMMETRY_ETERNAL:
1347     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1348     break;
1349   default:
1350     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1351   }
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1357 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1358 {
1359   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1360   PetscErrorCode ierr;
1361   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1362   MatScalar      *aa,*aa_i;
1363   PetscScalar    *v_i;
1364 
1365   PetscFunctionBegin;
1366   bs  = A->rmap->bs;
1367   ai  = a->i;
1368   aj  = a->j;
1369   aa  = a->a;
1370   bs2 = a->bs2;
1371 
1372   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1373 
1374   bn  = row/bs;   /* Block number */
1375   bp  = row % bs; /* Block Position */
1376   M   = ai[bn+1] - ai[bn];
1377   *nz = bs*M;
1378 
1379   if (v) {
1380     *v = 0;
1381     if (*nz) {
1382       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1383       for (i=0; i<M; i++) { /* for each block in the block row */
1384         v_i  = *v + i*bs;
1385         aa_i = aa + bs2*(ai[bn] + i);
1386         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1387       }
1388     }
1389   }
1390 
1391   if (idx) {
1392     *idx = 0;
1393     if (*nz) {
1394       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1395       for (i=0; i<M; i++) { /* for each block in the block row */
1396         idx_i = *idx + i*bs;
1397         itmp  = bs*aj[ai[bn] + i];
1398         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1399       }
1400     }
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1407 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1408 {
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1413   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1419 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1420 {
1421   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1422   Mat            C;
1423   PetscErrorCode ierr;
1424   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1425   PetscInt       *rows,*cols,bs2=a->bs2;
1426   MatScalar      *array;
1427 
1428   PetscFunctionBegin;
1429   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1430   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1431     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1432     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1433 
1434     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1435     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1436     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1437     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1438     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1439     ierr = PetscFree(col);CHKERRQ(ierr);
1440   } else {
1441     C = *B;
1442   }
1443 
1444   array = a->a;
1445   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1446   cols = rows + bs;
1447   for (i=0; i<mbs; i++) {
1448     cols[0] = i*bs;
1449     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1450     len = ai[i+1] - ai[i];
1451     for (j=0; j<len; j++) {
1452       rows[0] = (*aj++)*bs;
1453       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1454       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1455       array += bs2;
1456     }
1457   }
1458   ierr = PetscFree(rows);CHKERRQ(ierr);
1459 
1460   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1461   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1462 
1463   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1464     *B = C;
1465   } else {
1466     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1467   }
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1473 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1474 {
1475   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1476   PetscErrorCode ierr;
1477   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1478   int            fd;
1479   PetscScalar    *aa;
1480   FILE           *file;
1481 
1482   PetscFunctionBegin;
1483   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1484   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1485   col_lens[0] = MAT_FILE_COOKIE;
1486 
1487   col_lens[1] = A->rmap->N;
1488   col_lens[2] = A->cmap->n;
1489   col_lens[3] = a->nz*bs2;
1490 
1491   /* store lengths of each row and write (including header) to file */
1492   count = 0;
1493   for (i=0; i<a->mbs; i++) {
1494     for (j=0; j<bs; j++) {
1495       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1496     }
1497   }
1498   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1499   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1500 
1501   /* store column indices (zero start index) */
1502   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1503   count = 0;
1504   for (i=0; i<a->mbs; i++) {
1505     for (j=0; j<bs; j++) {
1506       for (k=a->i[i]; k<a->i[i+1]; k++) {
1507         for (l=0; l<bs; l++) {
1508           jj[count++] = bs*a->j[k] + l;
1509         }
1510       }
1511     }
1512   }
1513   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1514   ierr = PetscFree(jj);CHKERRQ(ierr);
1515 
1516   /* store nonzero values */
1517   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1518   count = 0;
1519   for (i=0; i<a->mbs; i++) {
1520     for (j=0; j<bs; j++) {
1521       for (k=a->i[i]; k<a->i[i+1]; k++) {
1522         for (l=0; l<bs; l++) {
1523           aa[count++] = a->a[bs2*k + l*bs + j];
1524         }
1525       }
1526     }
1527   }
1528   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1529   ierr = PetscFree(aa);CHKERRQ(ierr);
1530 
1531   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1532   if (file) {
1533     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1534   }
1535   PetscFunctionReturn(0);
1536 }
1537 
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1540 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1541 {
1542   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1543   PetscErrorCode    ierr;
1544   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1545   PetscViewerFormat format;
1546 
1547   PetscFunctionBegin;
1548   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1549   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1550     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1551   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1552     Mat aij;
1553     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1554     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1555     ierr = MatDestroy(aij);CHKERRQ(ierr);
1556   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1557      PetscFunctionReturn(0);
1558   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1560     for (i=0; i<a->mbs; i++) {
1561       for (j=0; j<bs; j++) {
1562         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1563         for (k=a->i[i]; k<a->i[i+1]; k++) {
1564           for (l=0; l<bs; l++) {
1565 #if defined(PETSC_USE_COMPLEX)
1566             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1568                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1571                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1572             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1573               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1574             }
1575 #else
1576             if (a->a[bs2*k + l*bs + j] != 0.0) {
1577               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1578             }
1579 #endif
1580           }
1581         }
1582         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1583       }
1584     }
1585     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1586   } else {
1587     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1588     for (i=0; i<a->mbs; i++) {
1589       for (j=0; j<bs; j++) {
1590         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1591         for (k=a->i[i]; k<a->i[i+1]; k++) {
1592           for (l=0; l<bs; l++) {
1593 #if defined(PETSC_USE_COMPLEX)
1594             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1595               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1596                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1597             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1598               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1599                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1600             } else {
1601               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1602             }
1603 #else
1604             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1605 #endif
1606           }
1607         }
1608         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1609       }
1610     }
1611     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1612   }
1613   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1614   PetscFunctionReturn(0);
1615 }
1616 
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1619 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1620 {
1621   Mat            A = (Mat) Aa;
1622   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1623   PetscErrorCode ierr;
1624   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1625   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1626   MatScalar      *aa;
1627   PetscViewer    viewer;
1628 
1629   PetscFunctionBegin;
1630 
1631   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1632   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1633 
1634   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1635 
1636   /* loop over matrix elements drawing boxes */
1637   color = PETSC_DRAW_BLUE;
1638   for (i=0,row=0; i<mbs; i++,row+=bs) {
1639     for (j=a->i[i]; j<a->i[i+1]; j++) {
1640       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1641       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1642       aa = a->a + j*bs2;
1643       for (k=0; k<bs; k++) {
1644         for (l=0; l<bs; l++) {
1645           if (PetscRealPart(*aa++) >=  0.) continue;
1646           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1647         }
1648       }
1649     }
1650   }
1651   color = PETSC_DRAW_CYAN;
1652   for (i=0,row=0; i<mbs; i++,row+=bs) {
1653     for (j=a->i[i]; j<a->i[i+1]; j++) {
1654       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1655       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1656       aa = a->a + j*bs2;
1657       for (k=0; k<bs; k++) {
1658         for (l=0; l<bs; l++) {
1659           if (PetscRealPart(*aa++) != 0.) continue;
1660           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1661         }
1662       }
1663     }
1664   }
1665 
1666   color = PETSC_DRAW_RED;
1667   for (i=0,row=0; i<mbs; i++,row+=bs) {
1668     for (j=a->i[i]; j<a->i[i+1]; j++) {
1669       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671       aa = a->a + j*bs2;
1672       for (k=0; k<bs; k++) {
1673         for (l=0; l<bs; l++) {
1674           if (PetscRealPart(*aa++) <= 0.) continue;
1675           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1676         }
1677       }
1678     }
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1685 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscReal      xl,yl,xr,yr,w,h;
1689   PetscDraw      draw;
1690   PetscTruth     isnull;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1696 
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr += w;    yr += h;  xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1702   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 #undef __FUNCT__
1707 #define __FUNCT__ "MatView_SeqBAIJ"
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscTruth     iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii){
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1735 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1736 {
1737   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1738   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1739   PetscInt    *ai = a->i,*ailen = a->ilen;
1740   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1741   MatScalar   *ap,*aa = a->a;
1742 
1743   PetscFunctionBegin;
1744   for (k=0; k<m; k++) { /* loop over rows */
1745     row  = im[k]; brow = row/bs;
1746     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1747     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1748     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1749     nrow = ailen[brow];
1750     for (l=0; l<n; l++) { /* loop over columns */
1751       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1752       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1753       col  = in[l] ;
1754       bcol = col/bs;
1755       cidx = col%bs;
1756       ridx = row%bs;
1757       high = nrow;
1758       low  = 0; /* assume unsorted */
1759       while (high-low > 5) {
1760         t = (low+high)/2;
1761         if (rp[t] > bcol) high = t;
1762         else             low  = t;
1763       }
1764       for (i=low; i<high; i++) {
1765         if (rp[i] > bcol) break;
1766         if (rp[i] == bcol) {
1767           *v++ = ap[bs2*i+bs*cidx+ridx];
1768           goto finished;
1769         }
1770       }
1771       *v++ = 0.0;
1772       finished:;
1773     }
1774   }
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #define CHUNKSIZE 10
1779 #undef __FUNCT__
1780 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1781 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1782 {
1783   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1784   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1785   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1786   PetscErrorCode    ierr;
1787   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1788   PetscTruth        roworiented=a->roworiented;
1789   const PetscScalar *value = v;
1790   MatScalar         *ap,*aa = a->a,*bap;
1791 
1792   PetscFunctionBegin;
1793   if (roworiented) {
1794     stepval = (n-1)*bs;
1795   } else {
1796     stepval = (m-1)*bs;
1797   }
1798   for (k=0; k<m; k++) { /* loop over added rows */
1799     row  = im[k];
1800     if (row < 0) continue;
1801 #if defined(PETSC_USE_DEBUG)
1802     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1803 #endif
1804     rp   = aj + ai[row];
1805     ap   = aa + bs2*ai[row];
1806     rmax = imax[row];
1807     nrow = ailen[row];
1808     low  = 0;
1809     high = nrow;
1810     for (l=0; l<n; l++) { /* loop over added columns */
1811       if (in[l] < 0) continue;
1812 #if defined(PETSC_USE_DEBUG)
1813       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1814 #endif
1815       col = in[l];
1816       if (roworiented) {
1817         value = v + k*(stepval+bs)*bs + l*bs;
1818       } else {
1819         value = v + l*(stepval+bs)*bs + k*bs;
1820       }
1821       if (col <= lastcol) low = 0; else high = nrow;
1822       lastcol = col;
1823       while (high-low > 7) {
1824         t = (low+high)/2;
1825         if (rp[t] > col) high = t;
1826         else             low  = t;
1827       }
1828       for (i=low; i<high; i++) {
1829         if (rp[i] > col) break;
1830         if (rp[i] == col) {
1831           bap  = ap +  bs2*i;
1832           if (roworiented) {
1833             if (is == ADD_VALUES) {
1834               for (ii=0; ii<bs; ii++,value+=stepval) {
1835                 for (jj=ii; jj<bs2; jj+=bs) {
1836                   bap[jj] += *value++;
1837                 }
1838               }
1839             } else {
1840               for (ii=0; ii<bs; ii++,value+=stepval) {
1841                 for (jj=ii; jj<bs2; jj+=bs) {
1842                   bap[jj] = *value++;
1843                 }
1844               }
1845             }
1846           } else {
1847             if (is == ADD_VALUES) {
1848               for (ii=0; ii<bs; ii++,value+=stepval) {
1849                 for (jj=0; jj<bs; jj++) {
1850                   *bap++ += *value++;
1851                 }
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   *bap++  = *value++;
1857                 }
1858               }
1859             }
1860           }
1861           goto noinsert2;
1862         }
1863       }
1864       if (nonew == 1) goto noinsert2;
1865       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1866       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       N = nrow++ - 1; high++;
1868       /* shift up all the later entries in this row */
1869       for (ii=N; ii>=i; ii--) {
1870         rp[ii+1] = rp[ii];
1871         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1872       }
1873       if (N >= i) {
1874         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875       }
1876       rp[i] = col;
1877       bap   = ap +  bs2*i;
1878       if (roworiented) {
1879         for (ii=0; ii<bs; ii++,value+=stepval) {
1880           for (jj=ii; jj<bs2; jj+=bs) {
1881             bap[jj] = *value++;
1882           }
1883         }
1884       } else {
1885         for (ii=0; ii<bs; ii++,value+=stepval) {
1886           for (jj=0; jj<bs; jj++) {
1887             *bap++  = *value++;
1888           }
1889         }
1890       }
1891       noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax   = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1921       N = ailen[i];
1922       for (j=0; j<N; j++) {
1923         ip[j-fshift] = ip[j];
1924         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1925       }
1926     }
1927     ai[i] = ai[i-1] + ailen[i-1];
1928   }
1929   if (mbs) {
1930     fshift += imax[mbs-1] - ailen[mbs-1];
1931     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1932   }
1933   /* reset ilen and imax for each row */
1934   for (i=0; i<mbs; i++) {
1935     ailen[i] = imax[i] = ai[i+1] - ai[i];
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) {
1947     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1948   }
1949   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1950   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1951   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1952   a->reallocs          = 0;
1953   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1954 
1955   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1956   if (a->compressedrow.use){
1957     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959 
1960   A->same_nonzero = PETSC_TRUE;
1961   PetscFunctionReturn(0);
1962 }
1963 
1964 /*
1965    This function returns an array of flags which indicate the locations of contiguous
1966    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1967    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1968    Assume: sizes should be long enough to hold all the values.
1969 */
1970 #undef __FUNCT__
1971 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt   i,j,k,row;
1975   PetscTruth flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i+= bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
2010 {
2011   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode ierr;
2013   PetscInt       i,j,k,count,*rows;
2014   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar    zero = 0.0;
2016   MatScalar      *aa;
2017 
2018   PetscFunctionBegin;
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2022   sizes = rows + is_n;
2023 
2024   /* copy IS values to rows, and sort them */
2025   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2026   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2027   if (baij->keepnonzeropattern) {
2028     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2029     bs_max = is_n;
2030     A->same_nonzero = PETSC_TRUE;
2031   } else {
2032     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2033     A->same_nonzero = PETSC_FALSE;
2034   }
2035 
2036   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2037     row   = rows[j];
2038     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2039     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2040     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2041     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2042       if (diag != 0.0) {
2043         if (baij->ilen[row/bs] > 0) {
2044           baij->ilen[row/bs]       = 1;
2045           baij->j[baij->i[row/bs]] = row/bs;
2046           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2047         }
2048         /* Now insert all the diagonal values for this bs */
2049         for (k=0; k<bs; k++) {
2050           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051         }
2052       } else { /* (diag == 0.0) */
2053         baij->ilen[row/bs] = 0;
2054       } /* end (diag == 0.0) */
2055     } else { /* (sizes[i] != bs) */
2056 #if defined (PETSC_USE_DEBUG)
2057       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2058 #endif
2059       for (k=0; k<count; k++) {
2060         aa[0] =  zero;
2061         aa    += bs;
2062       }
2063       if (diag != 0.0) {
2064         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2065       }
2066     }
2067   }
2068 
2069   ierr = PetscFree(rows);CHKERRQ(ierr);
2070   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2071   PetscFunctionReturn(0);
2072 }
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2076 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2077 {
2078   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2079   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2080   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2081   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2082   PetscErrorCode ierr;
2083   PetscInt       ridx,cidx,bs2=a->bs2;
2084   PetscTruth     roworiented=a->roworiented;
2085   MatScalar      *ap,value,*aa=a->a,*bap;
2086 
2087   PetscFunctionBegin;
2088   if (v) PetscValidScalarPointer(v,6);
2089   for (k=0; k<m; k++) { /* loop over added rows */
2090     row  = im[k];
2091     brow = row/bs;
2092     if (row < 0) continue;
2093 #if defined(PETSC_USE_DEBUG)
2094     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2095 #endif
2096     rp   = aj + ai[brow];
2097     ap   = aa + bs2*ai[brow];
2098     rmax = imax[brow];
2099     nrow = ailen[brow];
2100     low  = 0;
2101     high = nrow;
2102     for (l=0; l<n; l++) { /* loop over added columns */
2103       if (in[l] < 0) continue;
2104 #if defined(PETSC_USE_DEBUG)
2105       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2106 #endif
2107       col = in[l]; bcol = col/bs;
2108       ridx = row % bs; cidx = col % bs;
2109       if (roworiented) {
2110         value = v[l + k*n];
2111       } else {
2112         value = v[k + l*m];
2113       }
2114       if (col <= lastcol) low = 0; else high = nrow;
2115       lastcol = col;
2116       while (high-low > 7) {
2117         t = (low+high)/2;
2118         if (rp[t] > bcol) high = t;
2119         else              low  = t;
2120       }
2121       for (i=low; i<high; i++) {
2122         if (rp[i] > bcol) break;
2123         if (rp[i] == bcol) {
2124           bap  = ap +  bs2*i + bs*cidx + ridx;
2125           if (is == ADD_VALUES) *bap += value;
2126           else                  *bap  = value;
2127           goto noinsert1;
2128         }
2129       }
2130       if (nonew == 1) goto noinsert1;
2131       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2132       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2133       N = nrow++ - 1; high++;
2134       /* shift up all the later entries in this row */
2135       for (ii=N; ii>=i; ii--) {
2136         rp[ii+1] = rp[ii];
2137         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2138       }
2139       if (N>=i) {
2140         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2141       }
2142       rp[i]                      = bcol;
2143       ap[bs2*i + bs*cidx + ridx] = value;
2144       a->nz++;
2145       noinsert1:;
2146       low = i;
2147     }
2148     ailen[brow] = nrow;
2149   }
2150   A->same_nonzero = PETSC_FALSE;
2151   PetscFunctionReturn(0);
2152 }
2153 
2154 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2155 
2156 #undef __FUNCT__
2157 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2158 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2159 {
2160   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2161   Mat            outA;
2162   PetscErrorCode ierr;
2163   PetscTruth     row_identity,col_identity;
2164 
2165   PetscFunctionBegin;
2166   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2167   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2168   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2169   if (!row_identity || !col_identity) {
2170     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2171   }
2172 
2173   outA          = inA;
2174   inA->factor   = MAT_FACTOR_LU;
2175 
2176   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2177 
2178   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2179   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2180   a->row = row;
2181   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2182   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2183   a->col = col;
2184 
2185   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2186   if (a->icol) {
2187     ierr = ISDestroy(a->icol);CHKERRQ(ierr);
2188   }
2189   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2190   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2191 
2192   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2193   if (!a->solve_work) {
2194     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2195     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2196   }
2197   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2198 
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 EXTERN_C_BEGIN
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2205 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2206 {
2207   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2208   PetscInt    i,nz,mbs;
2209 
2210   PetscFunctionBegin;
2211   nz  = baij->maxnz/baij->bs2;
2212   mbs = baij->mbs;
2213   for (i=0; i<nz; i++) {
2214     baij->j[i] = indices[i];
2215   }
2216   baij->nz = nz;
2217   for (i=0; i<mbs; i++) {
2218     baij->ilen[i] = baij->imax[i];
2219   }
2220   PetscFunctionReturn(0);
2221 }
2222 EXTERN_C_END
2223 
2224 #undef __FUNCT__
2225 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2226 /*@
2227     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2228        in the matrix.
2229 
2230   Input Parameters:
2231 +  mat - the SeqBAIJ matrix
2232 -  indices - the column indices
2233 
2234   Level: advanced
2235 
2236   Notes:
2237     This can be called if you have precomputed the nonzero structure of the
2238   matrix and want to provide it to the matrix object to improve the performance
2239   of the MatSetValues() operation.
2240 
2241     You MUST have set the correct numbers of nonzeros per row in the call to
2242   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2243 
2244     MUST be called before any calls to MatSetValues();
2245 
2246 @*/
2247 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2248 {
2249   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2250 
2251   PetscFunctionBegin;
2252   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2253   PetscValidPointer(indices,2);
2254   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2255   if (f) {
2256     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2257   } else {
2258     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2259   }
2260   PetscFunctionReturn(0);
2261 }
2262 
2263 #undef __FUNCT__
2264 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2265 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2266 {
2267   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2268   PetscErrorCode ierr;
2269   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2270   PetscReal      atmp;
2271   PetscScalar    *x,zero = 0.0;
2272   MatScalar      *aa;
2273   PetscInt       ncols,brow,krow,kcol;
2274 
2275   PetscFunctionBegin;
2276   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2277   bs   = A->rmap->bs;
2278   aa   = a->a;
2279   ai   = a->i;
2280   aj   = a->j;
2281   mbs  = a->mbs;
2282 
2283   ierr = VecSet(v,zero);CHKERRQ(ierr);
2284   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2285   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2286   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2287   for (i=0; i<mbs; i++) {
2288     ncols = ai[1] - ai[0]; ai++;
2289     brow  = bs*i;
2290     for (j=0; j<ncols; j++){
2291       for (kcol=0; kcol<bs; kcol++){
2292         for (krow=0; krow<bs; krow++){
2293           atmp = PetscAbsScalar(*aa);aa++;
2294           row = brow + krow;    /* row index */
2295           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2296           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2297         }
2298       }
2299       aj++;
2300     }
2301   }
2302   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatCopy_SeqBAIJ"
2308 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2309 {
2310   PetscErrorCode ierr;
2311 
2312   PetscFunctionBegin;
2313   /* If the two matrices have the same copy implementation, use fast copy. */
2314   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2315     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2316     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2317 
2318     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2319       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2320     }
2321     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2322   } else {
2323     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2324   }
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2330 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2331 {
2332   PetscErrorCode ierr;
2333 
2334   PetscFunctionBegin;
2335   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2341 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2342 {
2343   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2344   PetscFunctionBegin;
2345   *array = a->a;
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2351 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2352 {
2353   PetscFunctionBegin;
2354   PetscFunctionReturn(0);
2355 }
2356 
2357 #include "petscblaslapack.h"
2358 #undef __FUNCT__
2359 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2360 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2361 {
2362   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2363   PetscErrorCode ierr;
2364   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2365   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2366 
2367   PetscFunctionBegin;
2368   if (str == SAME_NONZERO_PATTERN) {
2369     PetscScalar alpha = a;
2370     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2371   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2372     if (y->xtoy && y->XtoY != X) {
2373       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2374       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2375     }
2376     if (!y->xtoy) { /* get xtoy */
2377       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2378       y->XtoY = X;
2379       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2380     }
2381     bs2 = bs*bs;
2382     for (i=0; i<x->nz; i++) {
2383       j = 0;
2384       while (j < bs2){
2385         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2386         j++;
2387       }
2388     }
2389     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2390   } else {
2391     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2392   }
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatSetBlockSize_SeqBAIJ"
2398 PetscErrorCode MatSetBlockSize_SeqBAIJ(Mat A,PetscInt bs)
2399 {
2400   PetscInt rbs,cbs;
2401   PetscErrorCode ierr;
2402 
2403   PetscFunctionBegin;
2404   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
2405   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
2406   if (rbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,rbs);
2407   if (cbs != bs) SETERRQ2(PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with BAIJ %d",bs,cbs);
2408   PetscFunctionReturn(0);
2409 }
2410 
2411 #undef __FUNCT__
2412 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2413 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2414 {
2415   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2416   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2417   MatScalar      *aa = a->a;
2418 
2419   PetscFunctionBegin;
2420   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2421   PetscFunctionReturn(0);
2422 }
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2426 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2427 {
2428   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2429   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2430   MatScalar      *aa = a->a;
2431 
2432   PetscFunctionBegin;
2433   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2441 /*
2442     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2443 */
2444 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2445 {
2446   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2447   PetscErrorCode ierr;
2448   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2449   PetscInt       nz = a->i[m],row,*jj,mr,col;
2450 
2451   PetscFunctionBegin;
2452   *nn = n;
2453   if (!ia) PetscFunctionReturn(0);
2454   if (symmetric) {
2455     SETERRQ(PETSC_ERR_SUP,"Not for BAIJ matrices");
2456   } else {
2457     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2458     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2459     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2460     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2461     jj = a->j;
2462     for (i=0; i<nz; i++) {
2463       collengths[jj[i]]++;
2464     }
2465     cia[0] = oshift;
2466     for (i=0; i<n; i++) {
2467       cia[i+1] = cia[i] + collengths[i];
2468     }
2469     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2470     jj   = a->j;
2471     for (row=0; row<m; row++) {
2472       mr = a->i[row+1] - a->i[row];
2473       for (i=0; i<mr; i++) {
2474         col = *jj++;
2475         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2476       }
2477     }
2478     ierr = PetscFree(collengths);CHKERRQ(ierr);
2479     *ia = cia; *ja = cja;
2480   }
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2486 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth inodecompressed,PetscInt *n,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
2487 {
2488   PetscErrorCode ierr;
2489 
2490   PetscFunctionBegin;
2491   if (!ia) PetscFunctionReturn(0);
2492   ierr = PetscFree(*ia);CHKERRQ(ierr);
2493   ierr = PetscFree(*ja);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2499 PetscErrorCode PETSCMAT_DLLEXPORT MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2500 {
2501   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2502   PetscErrorCode ierr;
2503   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow,m1,m2;
2504   PetscScalar    dx,*y,*xx,*w3_array;
2505   PetscScalar    *vscale_array;
2506   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2507   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2508   void           *fctx = coloring->fctx;
2509   PetscTruth     flg = PETSC_FALSE;
2510   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2511   Vec            x1_tmp;
2512 
2513   PetscFunctionBegin;
2514   PetscValidHeaderSpecific(J,MAT_COOKIE,1);
2515   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_COOKIE,2);
2516   PetscValidHeaderSpecific(x1,VEC_COOKIE,3);
2517   if (!f) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2518 
2519   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2520   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2521   ierr = PetscOptionsGetTruth(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2522   if (flg) {
2523     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2524   } else {
2525     PetscTruth assembled;
2526     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2527     if (assembled) {
2528       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2529     }
2530   }
2531 
2532   x1_tmp = x1;
2533   if (!coloring->vscale){
2534     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2535   }
2536 
2537   /*
2538     This is a horrible, horrible, hack. See DMMGComputeJacobian_Multigrid() it inproperly sets
2539     coloring->F for the coarser grids from the finest
2540   */
2541   if (coloring->F) {
2542     ierr = VecGetLocalSize(coloring->F,&m1);CHKERRQ(ierr);
2543     ierr = VecGetLocalSize(w1,&m2);CHKERRQ(ierr);
2544     if (m1 != m2) {
2545       coloring->F = 0;
2546       }
2547     }
2548 
2549   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2550     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2551   }
2552   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2553 
2554   /* Set w1 = F(x1) */
2555   if (coloring->F) {
2556     w1          = coloring->F; /* use already computed value of function */
2557     coloring->F = 0;
2558   } else {
2559     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2560     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2561     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2562   }
2563 
2564   if (!coloring->w3) {
2565     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2566     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2567   }
2568   w3 = coloring->w3;
2569 
2570     CHKMEMQ;
2571     /* Compute all the local scale factors, including ghost points */
2572   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2573   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2574   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2575   if (ctype == IS_COLORING_GHOSTED){
2576     col_start = 0; col_end = N;
2577   } else if (ctype == IS_COLORING_GLOBAL){
2578     xx = xx - start;
2579     vscale_array = vscale_array - start;
2580     col_start = start; col_end = N + start;
2581   }    CHKMEMQ;
2582   for (col=col_start; col<col_end; col++){
2583     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2584     if (coloring->htype[0] == 'w') {
2585       dx = 1.0 + unorm;
2586     } else {
2587       dx  = xx[col];
2588     }
2589     if (dx == 0.0) dx = 1.0;
2590 #if !defined(PETSC_USE_COMPLEX)
2591     if (dx < umin && dx >= 0.0)      dx = umin;
2592     else if (dx < 0.0 && dx > -umin) dx = -umin;
2593 #else
2594     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2595     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2596 #endif
2597     dx               *= epsilon;
2598     vscale_array[col] = 1.0/dx;
2599   }     CHKMEMQ;
2600   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2601   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2602   if (ctype == IS_COLORING_GLOBAL){
2603     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2604     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2605   }
2606         CHKMEMQ;
2607   if (coloring->vscaleforrow) {
2608     vscaleforrow = coloring->vscaleforrow;
2609   } else {
2610     SETERRQ(PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2611   }
2612 
2613 
2614   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2615   /*
2616     Loop over each color
2617   */
2618   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2619   for (k=0; k<coloring->ncolors; k++) {
2620     coloring->currentcolor = k;
2621     for (i=0; i<bs; i++) {
2622       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2623       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2624       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2625       /*
2626 	Loop over each column associated with color
2627 	adding the perturbation to the vector w3.
2628       */
2629       for (l=0; l<coloring->ncolumns[k]; l++) {
2630 	col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2631 	if (coloring->htype[0] == 'w') {
2632 	  dx = 1.0 + unorm;
2633 	} else {
2634 	  dx  = xx[col];
2635 	}
2636 	if (dx == 0.0) dx = 1.0;
2637 #if !defined(PETSC_USE_COMPLEX)
2638 	if (dx < umin && dx >= 0.0)      dx = umin;
2639 	else if (dx < 0.0 && dx > -umin) dx = -umin;
2640 #else
2641 	if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2642 	else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2643 #endif
2644 	dx            *= epsilon;
2645 	if (!PetscAbsScalar(dx)) SETERRQ(PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2646 	w3_array[col] += dx;
2647       }
2648       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2649       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2650 
2651       /*
2652 	Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2653 	w2 = F(x1 + dx) - F(x1)
2654       */
2655       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2656       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2657       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2658       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2659 
2660       /*
2661 	Loop over rows of vector, putting results into Jacobian matrix
2662       */
2663       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2664       for (l=0; l<coloring->nrows[k]; l++) {
2665 	row    = bs*coloring->rows[k][l];             /* local row index */
2666 	col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2667         for (j=0; j<bs; j++) {
2668   	  y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2669           srows[j]  = row + start + j;
2670         }
2671 	ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2672       }
2673       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2674     }
2675   } /* endof for each color */
2676   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2677   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2678   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2679   ierr = PetscFree(srows);CHKERRQ(ierr);
2680 
2681   coloring->currentcolor = -1;
2682   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2683   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2684   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2685   PetscFunctionReturn(0);
2686 }
2687 
2688 /* -------------------------------------------------------------------*/
2689 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2690        MatGetRow_SeqBAIJ,
2691        MatRestoreRow_SeqBAIJ,
2692        MatMult_SeqBAIJ_N,
2693 /* 4*/ MatMultAdd_SeqBAIJ_N,
2694        MatMultTranspose_SeqBAIJ,
2695        MatMultTransposeAdd_SeqBAIJ,
2696        0,
2697        0,
2698        0,
2699 /*10*/ 0,
2700        MatLUFactor_SeqBAIJ,
2701        0,
2702        0,
2703        MatTranspose_SeqBAIJ,
2704 /*15*/ MatGetInfo_SeqBAIJ,
2705        MatEqual_SeqBAIJ,
2706        MatGetDiagonal_SeqBAIJ,
2707        MatDiagonalScale_SeqBAIJ,
2708        MatNorm_SeqBAIJ,
2709 /*20*/ 0,
2710        MatAssemblyEnd_SeqBAIJ,
2711        MatSetOption_SeqBAIJ,
2712        MatZeroEntries_SeqBAIJ,
2713 /*24*/ MatZeroRows_SeqBAIJ,
2714        0,
2715        0,
2716        0,
2717        0,
2718 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2719        0,
2720        0,
2721        MatGetArray_SeqBAIJ,
2722        MatRestoreArray_SeqBAIJ,
2723 /*34*/ MatDuplicate_SeqBAIJ,
2724        0,
2725        0,
2726        MatILUFactor_SeqBAIJ,
2727        0,
2728 /*39*/ MatAXPY_SeqBAIJ,
2729        MatGetSubMatrices_SeqBAIJ,
2730        MatIncreaseOverlap_SeqBAIJ,
2731        MatGetValues_SeqBAIJ,
2732        MatCopy_SeqBAIJ,
2733 /*44*/ 0,
2734        MatScale_SeqBAIJ,
2735        0,
2736        0,
2737        MatILUDTFactor_SeqBAIJ,
2738 /*49*/ MatSetBlockSize_SeqBAIJ,
2739        MatGetRowIJ_SeqBAIJ,
2740        MatRestoreRowIJ_SeqBAIJ,
2741        MatGetColumnIJ_SeqBAIJ,
2742        MatRestoreColumnIJ_SeqBAIJ,
2743 /*54*/ MatFDColoringCreate_SeqAIJ,
2744        0,
2745        0,
2746        0,
2747        MatSetValuesBlocked_SeqBAIJ,
2748 /*59*/ MatGetSubMatrix_SeqBAIJ,
2749        MatDestroy_SeqBAIJ,
2750        MatView_SeqBAIJ,
2751        0,
2752        0,
2753 /*64*/ 0,
2754        0,
2755        0,
2756        0,
2757        0,
2758 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2759        0,
2760        MatConvert_Basic,
2761        0,
2762        0,
2763 /*74*/ 0,
2764        MatFDColoringApply_BAIJ,
2765        0,
2766        0,
2767        0,
2768 /*79*/ 0,
2769        0,
2770        0,
2771        0,
2772        MatLoad_SeqBAIJ,
2773 /*84*/ 0,
2774        0,
2775        0,
2776        0,
2777        0,
2778 /*89*/ 0,
2779        0,
2780        0,
2781        0,
2782        0,
2783 /*94*/ 0,
2784        0,
2785        0,
2786        0,
2787        0,
2788 /*99*/0,
2789        0,
2790        0,
2791        0,
2792        0,
2793 /*104*/0,
2794        MatRealPart_SeqBAIJ,
2795        MatImaginaryPart_SeqBAIJ,
2796        0,
2797        0,
2798 /*109*/0,
2799        0,
2800        0,
2801        0,
2802        MatMissingDiagonal_SeqBAIJ,
2803 /*114*/0,
2804        0,
2805        0,
2806        0,
2807        0,
2808 /*119*/0,
2809        0,
2810        MatMultHermitianTranspose_SeqBAIJ,
2811        MatMultHermitianTransposeAdd_SeqBAIJ
2812 };
2813 
2814 EXTERN_C_BEGIN
2815 #undef __FUNCT__
2816 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2817 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2818 {
2819   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2820   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2821   PetscErrorCode ierr;
2822 
2823   PetscFunctionBegin;
2824   if (aij->nonew != 1) {
2825     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2826   }
2827 
2828   /* allocate space for values if not already there */
2829   if (!aij->saved_values) {
2830     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2831     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2832   }
2833 
2834   /* copy values over */
2835   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2836   PetscFunctionReturn(0);
2837 }
2838 EXTERN_C_END
2839 
2840 EXTERN_C_BEGIN
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2843 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2844 {
2845   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2846   PetscErrorCode ierr;
2847   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2848 
2849   PetscFunctionBegin;
2850   if (aij->nonew != 1) {
2851     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2852   }
2853   if (!aij->saved_values) {
2854     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2855   }
2856 
2857   /* copy values over */
2858   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 EXTERN_C_END
2862 
2863 EXTERN_C_BEGIN
2864 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2865 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2866 EXTERN_C_END
2867 
2868 EXTERN_C_BEGIN
2869 #undef __FUNCT__
2870 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2871 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2872 {
2873   Mat_SeqBAIJ    *b;
2874   PetscErrorCode ierr;
2875   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2876   PetscTruth     flg,skipallocation = PETSC_FALSE;
2877 
2878   PetscFunctionBegin;
2879 
2880   if (nz == MAT_SKIP_ALLOCATION) {
2881     skipallocation = PETSC_TRUE;
2882     nz             = 0;
2883   }
2884 
2885   if (bs < 0) {
2886     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2887       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2888     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2889     bs   = PetscAbs(bs);
2890   }
2891   if (nnz && newbs != bs) {
2892     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2893   }
2894   bs   = newbs;
2895 
2896   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2897   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2898   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2899   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2900 
2901   B->preallocated = PETSC_TRUE;
2902 
2903   mbs  = B->rmap->n/bs;
2904   nbs  = B->cmap->n/bs;
2905   bs2  = bs*bs;
2906 
2907   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2908     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2909   }
2910 
2911   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2912   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2913   if (nnz) {
2914     for (i=0; i<mbs; i++) {
2915       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2916       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2917     }
2918   }
2919 
2920   b       = (Mat_SeqBAIJ*)B->data;
2921   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2922     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2923   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2924 
2925   if (!flg) {
2926     switch (bs) {
2927     case 1:
2928       B->ops->mult            = MatMult_SeqBAIJ_1;
2929       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2930       B->ops->sor           = MatSOR_SeqBAIJ_1;
2931       break;
2932     case 2:
2933       B->ops->mult            = MatMult_SeqBAIJ_2;
2934       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2935       B->ops->sor           = MatSOR_SeqBAIJ_2;
2936       break;
2937     case 3:
2938       B->ops->mult            = MatMult_SeqBAIJ_3;
2939       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2940       B->ops->sor           = MatSOR_SeqBAIJ_3;
2941       break;
2942     case 4:
2943       B->ops->mult            = MatMult_SeqBAIJ_4;
2944       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2945       B->ops->sor           = MatSOR_SeqBAIJ_4;
2946       break;
2947     case 5:
2948       B->ops->mult            = MatMult_SeqBAIJ_5;
2949       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2950       B->ops->sor           = MatSOR_SeqBAIJ_5;
2951       break;
2952     case 6:
2953       B->ops->mult            = MatMult_SeqBAIJ_6;
2954       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2955       B->ops->sor           = MatSOR_SeqBAIJ_6;
2956       break;
2957     case 7:
2958       B->ops->mult            = MatMult_SeqBAIJ_7;
2959       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2960       B->ops->sor           = MatSOR_SeqBAIJ_7;
2961       break;
2962     default:
2963       B->ops->mult            = MatMult_SeqBAIJ_N;
2964       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2965       break;
2966     }
2967   }
2968   B->rmap->bs      = bs;
2969   b->mbs     = mbs;
2970   b->nbs     = nbs;
2971   if (!skipallocation) {
2972     if (!b->imax) {
2973       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2974       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));
2975       b->free_imax_ilen = PETSC_TRUE;
2976     }
2977     /* b->ilen will count nonzeros in each block row so far. */
2978     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2979     if (!nnz) {
2980       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2981       else if (nz <= 0)        nz = 1;
2982       for (i=0; i<mbs; i++) b->imax[i] = nz;
2983       nz = nz*mbs;
2984     } else {
2985       nz = 0;
2986       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2987     }
2988 
2989     /* allocate the matrix space */
2990     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2991     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2992     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2993     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2994     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2995     b->singlemalloc = PETSC_TRUE;
2996     b->i[0] = 0;
2997     for (i=1; i<mbs+1; i++) {
2998       b->i[i] = b->i[i-1] + b->imax[i-1];
2999     }
3000     b->free_a     = PETSC_TRUE;
3001     b->free_ij    = PETSC_TRUE;
3002   } else {
3003     b->free_a     = PETSC_FALSE;
3004     b->free_ij    = PETSC_FALSE;
3005   }
3006 
3007   B->rmap->bs          = bs;
3008   b->bs2              = bs2;
3009   b->mbs              = mbs;
3010   b->nz               = 0;
3011   b->maxnz            = nz*bs2;
3012   B->info.nz_unneeded = (PetscReal)b->maxnz;
3013   PetscFunctionReturn(0);
3014 }
3015 EXTERN_C_END
3016 
3017 EXTERN_C_BEGIN
3018 #undef __FUNCT__
3019 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3020 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3021 {
3022   PetscInt       i,m,nz,nz_max=0,*nnz;
3023   PetscScalar    *values=0;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027 
3028   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3029 
3030   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3031   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3032   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3033   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3034   m = B->rmap->n/bs;
3035 
3036   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3037   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3038   for(i=0; i<m; i++) {
3039     nz = ii[i+1]- ii[i];
3040     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3041     nz_max = PetscMax(nz_max, nz);
3042     nnz[i] = nz;
3043   }
3044   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3045   ierr = PetscFree(nnz);CHKERRQ(ierr);
3046 
3047   values = (PetscScalar*)V;
3048   if (!values) {
3049     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3050     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3051   }
3052   for (i=0; i<m; i++) {
3053     PetscInt          ncols  = ii[i+1] - ii[i];
3054     const PetscInt    *icols = jj + ii[i];
3055     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3056     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3057   }
3058   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3059   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3060   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3061 
3062   PetscFunctionReturn(0);
3063 }
3064 EXTERN_C_END
3065 
3066 
3067 EXTERN_C_BEGIN
3068 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3069 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3070 EXTERN_C_END
3071 
3072 /*MC
3073    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3074    block sparse compressed row format.
3075 
3076    Options Database Keys:
3077 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3078 
3079   Level: beginner
3080 
3081 .seealso: MatCreateSeqBAIJ()
3082 M*/
3083 
3084 
3085 EXTERN_C_BEGIN
3086 #undef __FUNCT__
3087 #define __FUNCT__ "MatCreate_SeqBAIJ"
3088 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
3089 {
3090   PetscErrorCode ierr;
3091   PetscMPIInt    size;
3092   Mat_SeqBAIJ    *b;
3093 
3094   PetscFunctionBegin;
3095   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3096   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3097 
3098   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3099   B->data = (void*)b;
3100   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3101   B->mapping               = 0;
3102   b->row                   = 0;
3103   b->col                   = 0;
3104   b->icol                  = 0;
3105   b->reallocs              = 0;
3106   b->saved_values          = 0;
3107 
3108   b->roworiented           = PETSC_TRUE;
3109   b->nonew                 = 0;
3110   b->diag                  = 0;
3111   b->solve_work            = 0;
3112   b->mult_work             = 0;
3113   B->spptr                 = 0;
3114   B->info.nz_unneeded      = (PetscReal)b->maxnz;
3115   b->keepnonzeropattern    = PETSC_FALSE;
3116   b->xtoy                  = 0;
3117   b->XtoY                  = 0;
3118   b->compressedrow.use     = PETSC_FALSE;
3119   b->compressedrow.nrows   = 0;
3120   b->compressedrow.i       = PETSC_NULL;
3121   b->compressedrow.rindex  = PETSC_NULL;
3122   b->compressedrow.checked = PETSC_FALSE;
3123   B->same_nonzero          = PETSC_FALSE;
3124 
3125   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3126                                      "MatGetFactorAvailable_seqbaij_petsc",
3127                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3128   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3129                                      "MatGetFactor_seqbaij_petsc",
3130                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3131   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
3132                                      "MatInvertBlockDiagonal_SeqBAIJ",
3133                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3134   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3135                                      "MatStoreValues_SeqBAIJ",
3136                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3137   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3138                                      "MatRetrieveValues_SeqBAIJ",
3139                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3140   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3141                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3142                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3143   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3144                                      "MatConvert_SeqBAIJ_SeqAIJ",
3145                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3146   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3147                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3148                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3149   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3150                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3151                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3152   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3153                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3154                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3155   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3156   PetscFunctionReturn(0);
3157 }
3158 EXTERN_C_END
3159 
3160 #undef __FUNCT__
3161 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3162 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscTruth mallocmatspace)
3163 {
3164   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3165   PetscErrorCode ierr;
3166   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3167 
3168   PetscFunctionBegin;
3169   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
3170 
3171   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3172     c->imax = a->imax;
3173     c->ilen = a->ilen;
3174     c->free_imax_ilen = PETSC_FALSE;
3175   } else {
3176     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3177     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3178     for (i=0; i<mbs; i++) {
3179       c->imax[i] = a->imax[i];
3180       c->ilen[i] = a->ilen[i];
3181     }
3182     c->free_imax_ilen = PETSC_TRUE;
3183   }
3184 
3185   /* allocate the matrix space */
3186   if (mallocmatspace){
3187     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3188       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3189       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3190       c->singlemalloc = PETSC_FALSE;
3191       c->free_ij      = PETSC_FALSE;
3192       c->i            = a->i;
3193       c->j            = a->j;
3194       c->parent       = A;
3195       ierr            = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3196       ierr            = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3197       ierr            = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3198     } else {
3199       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3200       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3201       c->singlemalloc = PETSC_TRUE;
3202       c->free_ij      = PETSC_TRUE;
3203       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3204       if (mbs > 0) {
3205 	ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3206 	if (cpvalues == MAT_COPY_VALUES) {
3207 	  ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3208 	} else {
3209 	  ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3210 	}
3211       }
3212     }
3213   }
3214 
3215   c->roworiented = a->roworiented;
3216   c->nonew       = a->nonew;
3217   ierr = PetscLayoutCopy(A->rmap,&C->rmap);CHKERRQ(ierr);
3218   ierr = PetscLayoutCopy(A->cmap,&C->cmap);CHKERRQ(ierr);
3219   c->bs2         = a->bs2;
3220   c->mbs         = a->mbs;
3221   c->nbs         = a->nbs;
3222 
3223   if (a->diag) {
3224     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3225       c->diag      = a->diag;
3226       c->free_diag = PETSC_FALSE;
3227     } else {
3228       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3229       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3230       for (i=0; i<mbs; i++) {
3231         c->diag[i] = a->diag[i];
3232       }
3233       c->free_diag = PETSC_TRUE;
3234     }
3235   } else c->diag        = 0;
3236   c->nz                 = a->nz;
3237   c->maxnz              = a->maxnz;
3238   c->solve_work         = 0;
3239   c->mult_work          = 0;
3240   c->free_a             = PETSC_TRUE;
3241   c->free_ij            = PETSC_TRUE;
3242   C->preallocated       = PETSC_TRUE;
3243   C->assembled          = PETSC_TRUE;
3244 
3245   c->compressedrow.use     = a->compressedrow.use;
3246   c->compressedrow.nrows   = a->compressedrow.nrows;
3247   c->compressedrow.checked = a->compressedrow.checked;
3248   if ( a->compressedrow.checked && a->compressedrow.use){
3249     i = a->compressedrow.nrows;
3250     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
3251     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3252     c->compressedrow.rindex = c->compressedrow.i + i + 1;
3253     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3254     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3255   } else {
3256     c->compressedrow.use    = PETSC_FALSE;
3257     c->compressedrow.i      = PETSC_NULL;
3258     c->compressedrow.rindex = PETSC_NULL;
3259   }
3260   C->same_nonzero = A->same_nonzero;
3261   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3262   PetscFunctionReturn(0);
3263 }
3264 
3265 #undef __FUNCT__
3266 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3267 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3268 {
3269     PetscErrorCode ierr;
3270 
3271   PetscFunctionBegin;
3272   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3273   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3274   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3275   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);
3276   PetscFunctionReturn(0);
3277 }
3278 
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatLoad_SeqBAIJ"
3281 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
3282 {
3283   Mat_SeqBAIJ    *a;
3284   Mat            B;
3285   PetscErrorCode ierr;
3286   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3287   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3288   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
3289   PetscInt       *masked,nmask,tmp,bs2,ishift;
3290   PetscMPIInt    size;
3291   int            fd;
3292   PetscScalar    *aa;
3293   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3294 
3295   PetscFunctionBegin;
3296   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3297     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3298   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3299   bs2  = bs*bs;
3300 
3301   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3302   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
3303   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3304   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3305   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3306   M = header[1]; N = header[2]; nz = header[3];
3307 
3308   if (header[3] < 0) {
3309     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3310   }
3311 
3312   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
3313 
3314   /*
3315      This code adds extra rows to make sure the number of rows is
3316     divisible by the blocksize
3317   */
3318   mbs        = M/bs;
3319   extra_rows = bs - M + bs*(mbs);
3320   if (extra_rows == bs) extra_rows = 0;
3321   else                  mbs++;
3322   if (extra_rows) {
3323     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3324   }
3325 
3326   /* read in row lengths */
3327   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3328   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3329   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3330 
3331   /* read in column indices */
3332   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3333   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3334   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3335 
3336   /* loop over row lengths determining block row lengths */
3337   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3338   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3339   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3340   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3341   masked   = mask + mbs;
3342   rowcount = 0; nzcount = 0;
3343   for (i=0; i<mbs; i++) {
3344     nmask = 0;
3345     for (j=0; j<bs; j++) {
3346       kmax = rowlengths[rowcount];
3347       for (k=0; k<kmax; k++) {
3348         tmp = jj[nzcount++]/bs;
3349         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3350       }
3351       rowcount++;
3352     }
3353     browlengths[i] += nmask;
3354     /* zero out the mask elements we set */
3355     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3356   }
3357 
3358   /* create our matrix */
3359   ierr = MatCreate(comm,&B);
3360   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3361   ierr = MatSetType(B,type);CHKERRQ(ierr);
3362   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3363   a = (Mat_SeqBAIJ*)B->data;
3364 
3365   /* set matrix "i" values */
3366   a->i[0] = 0;
3367   for (i=1; i<= mbs; i++) {
3368     a->i[i]      = a->i[i-1] + browlengths[i-1];
3369     a->ilen[i-1] = browlengths[i-1];
3370   }
3371   a->nz         = 0;
3372   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3373 
3374   /* read in nonzero values */
3375   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3376   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3377   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3378 
3379   /* set "a" and "j" values into matrix */
3380   nzcount = 0; jcount = 0;
3381   for (i=0; i<mbs; i++) {
3382     nzcountb = nzcount;
3383     nmask    = 0;
3384     for (j=0; j<bs; j++) {
3385       kmax = rowlengths[i*bs+j];
3386       for (k=0; k<kmax; k++) {
3387         tmp = jj[nzcount++]/bs;
3388 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3389       }
3390     }
3391     /* sort the masked values */
3392     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3393 
3394     /* set "j" values into matrix */
3395     maskcount = 1;
3396     for (j=0; j<nmask; j++) {
3397       a->j[jcount++]  = masked[j];
3398       mask[masked[j]] = maskcount++;
3399     }
3400     /* set "a" values into matrix */
3401     ishift = bs2*a->i[i];
3402     for (j=0; j<bs; j++) {
3403       kmax = rowlengths[i*bs+j];
3404       for (k=0; k<kmax; k++) {
3405         tmp       = jj[nzcountb]/bs ;
3406         block     = mask[tmp] - 1;
3407         point     = jj[nzcountb] - bs*tmp;
3408         idx       = ishift + bs2*block + j + bs*point;
3409         a->a[idx] = (MatScalar)aa[nzcountb++];
3410       }
3411     }
3412     /* zero out the mask elements we set */
3413     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3414   }
3415   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3416 
3417   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3418   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3419   ierr = PetscFree(aa);CHKERRQ(ierr);
3420   ierr = PetscFree(jj);CHKERRQ(ierr);
3421   ierr = PetscFree(mask);CHKERRQ(ierr);
3422 
3423   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3424   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3425   ierr = MatView_Private(B);CHKERRQ(ierr);
3426 
3427   *A = B;
3428   PetscFunctionReturn(0);
3429 }
3430 
3431 #undef __FUNCT__
3432 #define __FUNCT__ "MatCreateSeqBAIJ"
3433 /*@C
3434    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3435    compressed row) format.  For good matrix assembly performance the
3436    user should preallocate the matrix storage by setting the parameter nz
3437    (or the array nnz).  By setting these parameters accurately, performance
3438    during matrix assembly can be increased by more than a factor of 50.
3439 
3440    Collective on MPI_Comm
3441 
3442    Input Parameters:
3443 +  comm - MPI communicator, set to PETSC_COMM_SELF
3444 .  bs - size of block
3445 .  m - number of rows
3446 .  n - number of columns
3447 .  nz - number of nonzero blocks  per block row (same for all rows)
3448 -  nnz - array containing the number of nonzero blocks in the various block rows
3449          (possibly different for each block row) or PETSC_NULL
3450 
3451    Output Parameter:
3452 .  A - the matrix
3453 
3454    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3455    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3456    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3457 
3458    Options Database Keys:
3459 .   -mat_no_unroll - uses code that does not unroll the loops in the
3460                      block calculations (much slower)
3461 .    -mat_block_size - size of the blocks to use
3462 
3463    Level: intermediate
3464 
3465    Notes:
3466    The number of rows and columns must be divisible by blocksize.
3467 
3468    If the nnz parameter is given then the nz parameter is ignored
3469 
3470    A nonzero block is any block that as 1 or more nonzeros in it
3471 
3472    The block AIJ format is fully compatible with standard Fortran 77
3473    storage.  That is, the stored row and column indices can begin at
3474    either one (as in Fortran) or zero.  See the users' manual for details.
3475 
3476    Specify the preallocated storage with either nz or nnz (not both).
3477    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3478    allocation.  For additional details, see the users manual chapter on
3479    matrices.
3480 
3481 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3482 @*/
3483 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3484 {
3485   PetscErrorCode ierr;
3486 
3487   PetscFunctionBegin;
3488   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3489   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3490   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3491   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3492   PetscFunctionReturn(0);
3493 }
3494 
3495 #undef __FUNCT__
3496 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3497 /*@C
3498    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3499    per row in the matrix. For good matrix assembly performance the
3500    user should preallocate the matrix storage by setting the parameter nz
3501    (or the array nnz).  By setting these parameters accurately, performance
3502    during matrix assembly can be increased by more than a factor of 50.
3503 
3504    Collective on MPI_Comm
3505 
3506    Input Parameters:
3507 +  A - the matrix
3508 .  bs - size of block
3509 .  nz - number of block nonzeros per block row (same for all rows)
3510 -  nnz - array containing the number of block nonzeros in the various block rows
3511          (possibly different for each block row) or PETSC_NULL
3512 
3513    Options Database Keys:
3514 .   -mat_no_unroll - uses code that does not unroll the loops in the
3515                      block calculations (much slower)
3516 .    -mat_block_size - size of the blocks to use
3517 
3518    Level: intermediate
3519 
3520    Notes:
3521    If the nnz parameter is given then the nz parameter is ignored
3522 
3523    You can call MatGetInfo() to get information on how effective the preallocation was;
3524    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3525    You can also run with the option -info and look for messages with the string
3526    malloc in them to see if additional memory allocation was needed.
3527 
3528    The block AIJ format is fully compatible with standard Fortran 77
3529    storage.  That is, the stored row and column indices can begin at
3530    either one (as in Fortran) or zero.  See the users' manual for details.
3531 
3532    Specify the preallocated storage with either nz or nnz (not both).
3533    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3534    allocation.  For additional details, see the users manual chapter on
3535    matrices.
3536 
3537 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3538 @*/
3539 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3540 {
3541   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3542 
3543   PetscFunctionBegin;
3544   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3545   if (f) {
3546     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3547   }
3548   PetscFunctionReturn(0);
3549 }
3550 
3551 #undef __FUNCT__
3552 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3553 /*@C
3554    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3555    (the default sequential PETSc format).
3556 
3557    Collective on MPI_Comm
3558 
3559    Input Parameters:
3560 +  A - the matrix
3561 .  i - the indices into j for the start of each local row (starts with zero)
3562 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3563 -  v - optional values in the matrix
3564 
3565    Level: developer
3566 
3567 .keywords: matrix, aij, compressed row, sparse
3568 
3569 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3570 @*/
3571 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3572 {
3573   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3574 
3575   PetscFunctionBegin;
3576   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3577   if (f) {
3578     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3579   }
3580   PetscFunctionReturn(0);
3581 }
3582 
3583 
3584 #undef __FUNCT__
3585 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3586 /*@
3587      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3588               (upper triangular entries in CSR format) provided by the user.
3589 
3590      Collective on MPI_Comm
3591 
3592    Input Parameters:
3593 +  comm - must be an MPI communicator of size 1
3594 .  bs - size of block
3595 .  m - number of rows
3596 .  n - number of columns
3597 .  i - row indices
3598 .  j - column indices
3599 -  a - matrix values
3600 
3601    Output Parameter:
3602 .  mat - the matrix
3603 
3604    Level: intermediate
3605 
3606    Notes:
3607        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3608     once the matrix is destroyed
3609 
3610        You cannot set new nonzero locations into this matrix, that will generate an error.
3611 
3612        The i and j indices are 0 based
3613 
3614 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3615 
3616 @*/
3617 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3618 {
3619   PetscErrorCode ierr;
3620   PetscInt       ii;
3621   Mat_SeqBAIJ    *baij;
3622 
3623   PetscFunctionBegin;
3624   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3625   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3626 
3627   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3628   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3629   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3630   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3631   baij = (Mat_SeqBAIJ*)(*mat)->data;
3632   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3633   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3634 
3635   baij->i = i;
3636   baij->j = j;
3637   baij->a = a;
3638   baij->singlemalloc = PETSC_FALSE;
3639   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3640   baij->free_a       = PETSC_FALSE;
3641   baij->free_ij       = PETSC_FALSE;
3642 
3643   for (ii=0; ii<m; ii++) {
3644     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3645 #if defined(PETSC_USE_DEBUG)
3646     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3647 #endif
3648   }
3649 #if defined(PETSC_USE_DEBUG)
3650   for (ii=0; ii<baij->i[m]; ii++) {
3651     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3652     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3653   }
3654 #endif
3655 
3656   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3657   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3658   PetscFunctionReturn(0);
3659 }
3660