xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fd5f9c01ffc3be76f9ceed6e73f05a0f273fc855)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap.bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   ierr = PetscFree(a);CHKERRQ(ierr);
1264 
1265   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1266   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1279 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1280 {
1281   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   switch (op) {
1286   case MAT_ROW_ORIENTED:
1287     a->roworiented    = flg;
1288     break;
1289   case MAT_KEEP_ZEROED_ROWS:
1290     a->keepzeroedrows = flg;
1291     break;
1292   case MAT_NEW_NONZERO_LOCATIONS:
1293     a->nonew          = (flg ? 0 : 1);
1294     break;
1295   case MAT_NEW_NONZERO_LOCATION_ERR:
1296     a->nonew          = (flg ? -1 : 0);
1297     break;
1298   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1299     a->nonew          = (flg ? -2 : 0);
1300     break;
1301   case MAT_NEW_DIAGONALS:
1302   case MAT_IGNORE_OFF_PROC_ENTRIES:
1303   case MAT_USE_HASH_TABLE:
1304     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1305     break;
1306   case MAT_SYMMETRIC:
1307   case MAT_STRUCTURALLY_SYMMETRIC:
1308   case MAT_HERMITIAN:
1309   case MAT_SYMMETRY_ETERNAL:
1310     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1311     break;
1312   default:
1313     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1314   }
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1321 {
1322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1325   MatScalar      *aa,*aa_i;
1326   PetscScalar    *v_i;
1327 
1328   PetscFunctionBegin;
1329   bs  = A->rmap.bs;
1330   ai  = a->i;
1331   aj  = a->j;
1332   aa  = a->a;
1333   bs2 = a->bs2;
1334 
1335   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1336 
1337   bn  = row/bs;   /* Block number */
1338   bp  = row % bs; /* Block Position */
1339   M   = ai[bn+1] - ai[bn];
1340   *nz = bs*M;
1341 
1342   if (v) {
1343     *v = 0;
1344     if (*nz) {
1345       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1346       for (i=0; i<M; i++) { /* for each block in the block row */
1347         v_i  = *v + i*bs;
1348         aa_i = aa + bs2*(ai[bn] + i);
1349         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1350       }
1351     }
1352   }
1353 
1354   if (idx) {
1355     *idx = 0;
1356     if (*nz) {
1357       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1358       for (i=0; i<M; i++) { /* for each block in the block row */
1359         idx_i = *idx + i*bs;
1360         itmp  = bs*aj[ai[bn] + i];
1361         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1362       }
1363     }
1364   }
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1370 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1371 {
1372   PetscErrorCode ierr;
1373 
1374   PetscFunctionBegin;
1375   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1376   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1382 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1383 {
1384   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1385   Mat            C;
1386   PetscErrorCode ierr;
1387   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1388   PetscInt       *rows,*cols,bs2=a->bs2;
1389   MatScalar      *array;
1390 
1391   PetscFunctionBegin;
1392   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1393   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1394     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1395     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1396 
1397     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1398     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1399     ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1400     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1401     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1402     ierr = PetscFree(col);CHKERRQ(ierr);
1403   } else {
1404     C = *B;
1405   }
1406 
1407   array = a->a;
1408   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1409   cols = rows + bs;
1410   for (i=0; i<mbs; i++) {
1411     cols[0] = i*bs;
1412     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1413     len = ai[i+1] - ai[i];
1414     for (j=0; j<len; j++) {
1415       rows[0] = (*aj++)*bs;
1416       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1417       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1418       array += bs2;
1419     }
1420   }
1421   ierr = PetscFree(rows);CHKERRQ(ierr);
1422 
1423   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1424   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425 
1426   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1427     *B = C;
1428   } else {
1429     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1430   }
1431   PetscFunctionReturn(0);
1432 }
1433 
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1436 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1437 {
1438   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1439   PetscErrorCode ierr;
1440   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1441   int            fd;
1442   PetscScalar    *aa;
1443   FILE           *file;
1444 
1445   PetscFunctionBegin;
1446   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1447   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1448   col_lens[0] = MAT_FILE_COOKIE;
1449 
1450   col_lens[1] = A->rmap.N;
1451   col_lens[2] = A->cmap.n;
1452   col_lens[3] = a->nz*bs2;
1453 
1454   /* store lengths of each row and write (including header) to file */
1455   count = 0;
1456   for (i=0; i<a->mbs; i++) {
1457     for (j=0; j<bs; j++) {
1458       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1459     }
1460   }
1461   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1462   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1463 
1464   /* store column indices (zero start index) */
1465   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1466   count = 0;
1467   for (i=0; i<a->mbs; i++) {
1468     for (j=0; j<bs; j++) {
1469       for (k=a->i[i]; k<a->i[i+1]; k++) {
1470         for (l=0; l<bs; l++) {
1471           jj[count++] = bs*a->j[k] + l;
1472         }
1473       }
1474     }
1475   }
1476   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1477   ierr = PetscFree(jj);CHKERRQ(ierr);
1478 
1479   /* store nonzero values */
1480   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1481   count = 0;
1482   for (i=0; i<a->mbs; i++) {
1483     for (j=0; j<bs; j++) {
1484       for (k=a->i[i]; k<a->i[i+1]; k++) {
1485         for (l=0; l<bs; l++) {
1486           aa[count++] = a->a[bs2*k + l*bs + j];
1487         }
1488       }
1489     }
1490   }
1491   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1492   ierr = PetscFree(aa);CHKERRQ(ierr);
1493 
1494   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1495   if (file) {
1496     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1497   }
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 #undef __FUNCT__
1502 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1503 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1504 {
1505   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1506   PetscErrorCode    ierr;
1507   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1508   PetscViewerFormat format;
1509 
1510   PetscFunctionBegin;
1511   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1512   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1513     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1514   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1515     Mat aij;
1516     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1517     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1518     ierr = MatDestroy(aij);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1520      PetscFunctionReturn(0);
1521   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1522     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1523     for (i=0; i<a->mbs; i++) {
1524       for (j=0; j<bs; j++) {
1525         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1526         for (k=a->i[i]; k<a->i[i+1]; k++) {
1527           for (l=0; l<bs; l++) {
1528 #if defined(PETSC_USE_COMPLEX)
1529             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1531                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1532             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1534                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             }
1538 #else
1539             if (a->a[bs2*k + l*bs + j] != 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1541             }
1542 #endif
1543           }
1544         }
1545         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1546       }
1547     }
1548     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1549   } else {
1550     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1551     for (i=0; i<a->mbs; i++) {
1552       for (j=0; j<bs; j++) {
1553         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1554         for (k=a->i[i]; k<a->i[i+1]; k++) {
1555           for (l=0; l<bs; l++) {
1556 #if defined(PETSC_USE_COMPLEX)
1557             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1558               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1559                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1560             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1562                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             } else {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             }
1566 #else
1567             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1568 #endif
1569           }
1570         }
1571         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1572       }
1573     }
1574     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1575   }
1576   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #undef __FUNCT__
1581 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1582 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1583 {
1584   Mat            A = (Mat) Aa;
1585   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1586   PetscErrorCode ierr;
1587   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1588   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1589   MatScalar      *aa;
1590   PetscViewer    viewer;
1591 
1592   PetscFunctionBegin;
1593 
1594   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1595   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1596 
1597   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1598 
1599   /* loop over matrix elements drawing boxes */
1600   color = PETSC_DRAW_BLUE;
1601   for (i=0,row=0; i<mbs; i++,row+=bs) {
1602     for (j=a->i[i]; j<a->i[i+1]; j++) {
1603       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1604       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1605       aa = a->a + j*bs2;
1606       for (k=0; k<bs; k++) {
1607         for (l=0; l<bs; l++) {
1608           if (PetscRealPart(*aa++) >=  0.) continue;
1609           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1610         }
1611       }
1612     }
1613   }
1614   color = PETSC_DRAW_CYAN;
1615   for (i=0,row=0; i<mbs; i++,row+=bs) {
1616     for (j=a->i[i]; j<a->i[i+1]; j++) {
1617       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1618       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1619       aa = a->a + j*bs2;
1620       for (k=0; k<bs; k++) {
1621         for (l=0; l<bs; l++) {
1622           if (PetscRealPart(*aa++) != 0.) continue;
1623           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1624         }
1625       }
1626     }
1627   }
1628 
1629   color = PETSC_DRAW_RED;
1630   for (i=0,row=0; i<mbs; i++,row+=bs) {
1631     for (j=a->i[i]; j<a->i[i+1]; j++) {
1632       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1633       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1634       aa = a->a + j*bs2;
1635       for (k=0; k<bs; k++) {
1636         for (l=0; l<bs; l++) {
1637           if (PetscRealPart(*aa++) <= 0.) continue;
1638           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1639         }
1640       }
1641     }
1642   }
1643   PetscFunctionReturn(0);
1644 }
1645 
1646 #undef __FUNCT__
1647 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1648 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1649 {
1650   PetscErrorCode ierr;
1651   PetscReal      xl,yl,xr,yr,w,h;
1652   PetscDraw      draw;
1653   PetscTruth     isnull;
1654 
1655   PetscFunctionBegin;
1656 
1657   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1658   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1659 
1660   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1661   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1662   xr += w;    yr += h;  xl = -w;     yl = -h;
1663   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1664   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1665   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 #undef __FUNCT__
1670 #define __FUNCT__ "MatView_SeqBAIJ"
1671 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1672 {
1673   PetscErrorCode ierr;
1674   PetscTruth     iascii,isbinary,isdraw;
1675 
1676   PetscFunctionBegin;
1677   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1678   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1680   if (iascii){
1681     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1682   } else if (isbinary) {
1683     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1684   } else if (isdraw) {
1685     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1686   } else {
1687     Mat B;
1688     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1689     ierr = MatView(B,viewer);CHKERRQ(ierr);
1690     ierr = MatDestroy(B);CHKERRQ(ierr);
1691   }
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 
1696 #undef __FUNCT__
1697 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1698 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1699 {
1700   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1701   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1702   PetscInt    *ai = a->i,*ailen = a->ilen;
1703   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1704   MatScalar   *ap,*aa = a->a;
1705 
1706   PetscFunctionBegin;
1707   for (k=0; k<m; k++) { /* loop over rows */
1708     row  = im[k]; brow = row/bs;
1709     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1710     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1711     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1712     nrow = ailen[brow];
1713     for (l=0; l<n; l++) { /* loop over columns */
1714       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1715       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1716       col  = in[l] ;
1717       bcol = col/bs;
1718       cidx = col%bs;
1719       ridx = row%bs;
1720       high = nrow;
1721       low  = 0; /* assume unsorted */
1722       while (high-low > 5) {
1723         t = (low+high)/2;
1724         if (rp[t] > bcol) high = t;
1725         else             low  = t;
1726       }
1727       for (i=low; i<high; i++) {
1728         if (rp[i] > bcol) break;
1729         if (rp[i] == bcol) {
1730           *v++ = ap[bs2*i+bs*cidx+ridx];
1731           goto finished;
1732         }
1733       }
1734       *v++ = 0.0;
1735       finished:;
1736     }
1737   }
1738   PetscFunctionReturn(0);
1739 }
1740 
1741 #define CHUNKSIZE 10
1742 #undef __FUNCT__
1743 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1744 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1745 {
1746   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1747   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1748   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1749   PetscErrorCode    ierr;
1750   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1751   PetscTruth        roworiented=a->roworiented;
1752   const PetscScalar *value = v;
1753   MatScalar         *ap,*aa = a->a,*bap;
1754 
1755   PetscFunctionBegin;
1756   if (roworiented) {
1757     stepval = (n-1)*bs;
1758   } else {
1759     stepval = (m-1)*bs;
1760   }
1761   for (k=0; k<m; k++) { /* loop over added rows */
1762     row  = im[k];
1763     if (row < 0) continue;
1764 #if defined(PETSC_USE_DEBUG)
1765     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1766 #endif
1767     rp   = aj + ai[row];
1768     ap   = aa + bs2*ai[row];
1769     rmax = imax[row];
1770     nrow = ailen[row];
1771     low  = 0;
1772     high = nrow;
1773     for (l=0; l<n; l++) { /* loop over added columns */
1774       if (in[l] < 0) continue;
1775 #if defined(PETSC_USE_DEBUG)
1776       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1777 #endif
1778       col = in[l];
1779       if (roworiented) {
1780         value = v + k*(stepval+bs)*bs + l*bs;
1781       } else {
1782         value = v + l*(stepval+bs)*bs + k*bs;
1783       }
1784       if (col <= lastcol) low = 0; else high = nrow;
1785       lastcol = col;
1786       while (high-low > 7) {
1787         t = (low+high)/2;
1788         if (rp[t] > col) high = t;
1789         else             low  = t;
1790       }
1791       for (i=low; i<high; i++) {
1792         if (rp[i] > col) break;
1793         if (rp[i] == col) {
1794           bap  = ap +  bs2*i;
1795           if (roworiented) {
1796             if (is == ADD_VALUES) {
1797               for (ii=0; ii<bs; ii++,value+=stepval) {
1798                 for (jj=ii; jj<bs2; jj+=bs) {
1799                   bap[jj] += *value++;
1800                 }
1801               }
1802             } else {
1803               for (ii=0; ii<bs; ii++,value+=stepval) {
1804                 for (jj=ii; jj<bs2; jj+=bs) {
1805                   bap[jj] = *value++;
1806                 }
1807               }
1808             }
1809           } else {
1810             if (is == ADD_VALUES) {
1811               for (ii=0; ii<bs; ii++,value+=stepval) {
1812                 for (jj=0; jj<bs; jj++) {
1813                   *bap++ += *value++;
1814                 }
1815               }
1816             } else {
1817               for (ii=0; ii<bs; ii++,value+=stepval) {
1818                 for (jj=0; jj<bs; jj++) {
1819                   *bap++  = *value++;
1820                 }
1821               }
1822             }
1823           }
1824           goto noinsert2;
1825         }
1826       }
1827       if (nonew == 1) goto noinsert2;
1828       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1829       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1830       N = nrow++ - 1; high++;
1831       /* shift up all the later entries in this row */
1832       for (ii=N; ii>=i; ii--) {
1833         rp[ii+1] = rp[ii];
1834         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1835       }
1836       if (N >= i) {
1837         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1838       }
1839       rp[i] = col;
1840       bap   = ap +  bs2*i;
1841       if (roworiented) {
1842         for (ii=0; ii<bs; ii++,value+=stepval) {
1843           for (jj=ii; jj<bs2; jj+=bs) {
1844             bap[jj] = *value++;
1845           }
1846         }
1847       } else {
1848         for (ii=0; ii<bs; ii++,value+=stepval) {
1849           for (jj=0; jj<bs; jj++) {
1850             *bap++  = *value++;
1851           }
1852         }
1853       }
1854       noinsert2:;
1855       low = i;
1856     }
1857     ailen[row] = nrow;
1858   }
1859   PetscFunctionReturn(0);
1860 }
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1864 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1865 {
1866   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1867   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1868   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1869   PetscErrorCode ierr;
1870   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1871   MatScalar      *aa = a->a,*ap;
1872   PetscReal      ratio=0.6;
1873 
1874   PetscFunctionBegin;
1875   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1876 
1877   if (m) rmax = ailen[0];
1878   for (i=1; i<mbs; i++) {
1879     /* move each row back by the amount of empty slots (fshift) before it*/
1880     fshift += imax[i-1] - ailen[i-1];
1881     rmax   = PetscMax(rmax,ailen[i]);
1882     if (fshift) {
1883       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1884       N = ailen[i];
1885       for (j=0; j<N; j++) {
1886         ip[j-fshift] = ip[j];
1887         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1888       }
1889     }
1890     ai[i] = ai[i-1] + ailen[i-1];
1891   }
1892   if (mbs) {
1893     fshift += imax[mbs-1] - ailen[mbs-1];
1894     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1895   }
1896   /* reset ilen and imax for each row */
1897   for (i=0; i<mbs; i++) {
1898     ailen[i] = imax[i] = ai[i+1] - ai[i];
1899   }
1900   a->nz = ai[mbs];
1901 
1902   /* diagonals may have moved, so kill the diagonal pointers */
1903   a->idiagvalid = PETSC_FALSE;
1904   if (fshift && a->diag) {
1905     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1906     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1907     a->diag = 0;
1908   }
1909   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1910   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1911   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1912   a->reallocs          = 0;
1913   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1914 
1915   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1916   if (a->compressedrow.use){
1917     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1918   }
1919 
1920   A->same_nonzero = PETSC_TRUE;
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 /*
1925    This function returns an array of flags which indicate the locations of contiguous
1926    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1927    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1928    Assume: sizes should be long enough to hold all the values.
1929 */
1930 #undef __FUNCT__
1931 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1932 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1933 {
1934   PetscInt   i,j,k,row;
1935   PetscTruth flg;
1936 
1937   PetscFunctionBegin;
1938   for (i=0,j=0; i<n; j++) {
1939     row = idx[i];
1940     if (row%bs!=0) { /* Not the begining of a block */
1941       sizes[j] = 1;
1942       i++;
1943     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1944       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1945       i++;
1946     } else { /* Begining of the block, so check if the complete block exists */
1947       flg = PETSC_TRUE;
1948       for (k=1; k<bs; k++) {
1949         if (row+k != idx[i+k]) { /* break in the block */
1950           flg = PETSC_FALSE;
1951           break;
1952         }
1953       }
1954       if (flg) { /* No break in the bs */
1955         sizes[j] = bs;
1956         i+= bs;
1957       } else {
1958         sizes[j] = 1;
1959         i++;
1960       }
1961     }
1962   }
1963   *bs_max = j;
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1969 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1970 {
1971   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1972   PetscErrorCode ierr;
1973   PetscInt       i,j,k,count,*rows;
1974   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1975   PetscScalar    zero = 0.0;
1976   MatScalar      *aa;
1977 
1978   PetscFunctionBegin;
1979   /* Make a copy of the IS and  sort it */
1980   /* allocate memory for rows,sizes */
1981   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1982   sizes = rows + is_n;
1983 
1984   /* copy IS values to rows, and sort them */
1985   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1986   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1987   if (baij->keepzeroedrows) {
1988     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1989     bs_max = is_n;
1990     A->same_nonzero = PETSC_TRUE;
1991   } else {
1992     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1993     A->same_nonzero = PETSC_FALSE;
1994   }
1995 
1996   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1997     row   = rows[j];
1998     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1999     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2000     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2001     if (sizes[i] == bs && !baij->keepzeroedrows) {
2002       if (diag != 0.0) {
2003         if (baij->ilen[row/bs] > 0) {
2004           baij->ilen[row/bs]       = 1;
2005           baij->j[baij->i[row/bs]] = row/bs;
2006           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2007         }
2008         /* Now insert all the diagonal values for this bs */
2009         for (k=0; k<bs; k++) {
2010           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2011         }
2012       } else { /* (diag == 0.0) */
2013         baij->ilen[row/bs] = 0;
2014       } /* end (diag == 0.0) */
2015     } else { /* (sizes[i] != bs) */
2016 #if defined (PETSC_USE_DEBUG)
2017       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2018 #endif
2019       for (k=0; k<count; k++) {
2020         aa[0] =  zero;
2021         aa    += bs;
2022       }
2023       if (diag != 0.0) {
2024         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2025       }
2026     }
2027   }
2028 
2029   ierr = PetscFree(rows);CHKERRQ(ierr);
2030   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2036 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2037 {
2038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2039   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2040   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2041   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2042   PetscErrorCode ierr;
2043   PetscInt       ridx,cidx,bs2=a->bs2;
2044   PetscTruth     roworiented=a->roworiented;
2045   MatScalar      *ap,value,*aa=a->a,*bap;
2046 
2047   PetscFunctionBegin;
2048   for (k=0; k<m; k++) { /* loop over added rows */
2049     row  = im[k];
2050     brow = row/bs;
2051     if (row < 0) continue;
2052 #if defined(PETSC_USE_DEBUG)
2053     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2054 #endif
2055     rp   = aj + ai[brow];
2056     ap   = aa + bs2*ai[brow];
2057     rmax = imax[brow];
2058     nrow = ailen[brow];
2059     low  = 0;
2060     high = nrow;
2061     for (l=0; l<n; l++) { /* loop over added columns */
2062       if (in[l] < 0) continue;
2063 #if defined(PETSC_USE_DEBUG)
2064       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2065 #endif
2066       col = in[l]; bcol = col/bs;
2067       ridx = row % bs; cidx = col % bs;
2068       if (roworiented) {
2069         value = v[l + k*n];
2070       } else {
2071         value = v[k + l*m];
2072       }
2073       if (col <= lastcol) low = 0; else high = nrow;
2074       lastcol = col;
2075       while (high-low > 7) {
2076         t = (low+high)/2;
2077         if (rp[t] > bcol) high = t;
2078         else              low  = t;
2079       }
2080       for (i=low; i<high; i++) {
2081         if (rp[i] > bcol) break;
2082         if (rp[i] == bcol) {
2083           bap  = ap +  bs2*i + bs*cidx + ridx;
2084           if (is == ADD_VALUES) *bap += value;
2085           else                  *bap  = value;
2086           goto noinsert1;
2087         }
2088       }
2089       if (nonew == 1) goto noinsert1;
2090       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2091       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2092       N = nrow++ - 1; high++;
2093       /* shift up all the later entries in this row */
2094       for (ii=N; ii>=i; ii--) {
2095         rp[ii+1] = rp[ii];
2096         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2097       }
2098       if (N>=i) {
2099         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2100       }
2101       rp[i]                      = bcol;
2102       ap[bs2*i + bs*cidx + ridx] = value;
2103       a->nz++;
2104       noinsert1:;
2105       low = i;
2106     }
2107     ailen[brow] = nrow;
2108   }
2109   A->same_nonzero = PETSC_FALSE;
2110   PetscFunctionReturn(0);
2111 }
2112 
2113 
2114 #undef __FUNCT__
2115 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2116 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2117 {
2118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2119   Mat            outA;
2120   PetscErrorCode ierr;
2121   PetscTruth     row_identity,col_identity;
2122 
2123   PetscFunctionBegin;
2124   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2125   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2126   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2127   if (!row_identity || !col_identity) {
2128     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2129   }
2130 
2131   outA          = inA;
2132   inA->factor   = MAT_FACTOR_LU;
2133 
2134   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2135 
2136   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2137   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2138   a->row = row;
2139   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2140   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2141   a->col = col;
2142 
2143   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2144   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2146 
2147   /*
2148       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2149       for ILU(0) factorization with natural ordering
2150   */
2151   if (inA->rmap.bs < 8) {
2152     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2153   } else {
2154     if (!a->solve_work) {
2155       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2156       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2157     }
2158   }
2159 
2160   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2161 
2162   PetscFunctionReturn(0);
2163 }
2164 
2165 EXTERN_C_BEGIN
2166 #undef __FUNCT__
2167 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2168 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2169 {
2170   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2171   PetscInt    i,nz,nbs;
2172 
2173   PetscFunctionBegin;
2174   nz  = baij->maxnz/baij->bs2;
2175   nbs = baij->nbs;
2176   for (i=0; i<nz; i++) {
2177     baij->j[i] = indices[i];
2178   }
2179   baij->nz = nz;
2180   for (i=0; i<nbs; i++) {
2181     baij->ilen[i] = baij->imax[i];
2182   }
2183 
2184   PetscFunctionReturn(0);
2185 }
2186 EXTERN_C_END
2187 
2188 #undef __FUNCT__
2189 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2190 /*@
2191     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2192        in the matrix.
2193 
2194   Input Parameters:
2195 +  mat - the SeqBAIJ matrix
2196 -  indices - the column indices
2197 
2198   Level: advanced
2199 
2200   Notes:
2201     This can be called if you have precomputed the nonzero structure of the
2202   matrix and want to provide it to the matrix object to improve the performance
2203   of the MatSetValues() operation.
2204 
2205     You MUST have set the correct numbers of nonzeros per row in the call to
2206   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2207 
2208     MUST be called before any calls to MatSetValues();
2209 
2210 @*/
2211 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2212 {
2213   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2214 
2215   PetscFunctionBegin;
2216   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2217   PetscValidPointer(indices,2);
2218   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2219   if (f) {
2220     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2221   } else {
2222     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2223   }
2224   PetscFunctionReturn(0);
2225 }
2226 
2227 #undef __FUNCT__
2228 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2229 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2230 {
2231   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2232   PetscErrorCode ierr;
2233   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2234   PetscReal      atmp;
2235   PetscScalar    *x,zero = 0.0;
2236   MatScalar      *aa;
2237   PetscInt       ncols,brow,krow,kcol;
2238 
2239   PetscFunctionBegin;
2240   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2241   bs   = A->rmap.bs;
2242   aa   = a->a;
2243   ai   = a->i;
2244   aj   = a->j;
2245   mbs  = a->mbs;
2246 
2247   ierr = VecSet(v,zero);CHKERRQ(ierr);
2248   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2249   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2250   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2251   for (i=0; i<mbs; i++) {
2252     ncols = ai[1] - ai[0]; ai++;
2253     brow  = bs*i;
2254     for (j=0; j<ncols; j++){
2255       for (kcol=0; kcol<bs; kcol++){
2256         for (krow=0; krow<bs; krow++){
2257           atmp = PetscAbsScalar(*aa);aa++;
2258           row = brow + krow;    /* row index */
2259           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2260           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2261         }
2262       }
2263       aj++;
2264     }
2265   }
2266   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2267   PetscFunctionReturn(0);
2268 }
2269 
2270 #undef __FUNCT__
2271 #define __FUNCT__ "MatCopy_SeqBAIJ"
2272 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2273 {
2274   PetscErrorCode ierr;
2275 
2276   PetscFunctionBegin;
2277   /* If the two matrices have the same copy implementation, use fast copy. */
2278   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2279     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2280     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2281 
2282     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2283       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2284     }
2285     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2286   } else {
2287     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2288   }
2289   PetscFunctionReturn(0);
2290 }
2291 
2292 #undef __FUNCT__
2293 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2294 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2295 {
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2300   PetscFunctionReturn(0);
2301 }
2302 
2303 #undef __FUNCT__
2304 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2305 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2306 {
2307   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2308   PetscFunctionBegin;
2309   *array = a->a;
2310   PetscFunctionReturn(0);
2311 }
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2315 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2316 {
2317   PetscFunctionBegin;
2318   PetscFunctionReturn(0);
2319 }
2320 
2321 #include "petscblaslapack.h"
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2324 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2325 {
2326   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2327   PetscErrorCode ierr;
2328   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2329   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2330 
2331   PetscFunctionBegin;
2332   if (str == SAME_NONZERO_PATTERN) {
2333     PetscScalar alpha = a;
2334     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2335   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2336     if (y->xtoy && y->XtoY != X) {
2337       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2338       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2339     }
2340     if (!y->xtoy) { /* get xtoy */
2341       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2342       y->XtoY = X;
2343     }
2344     bs2 = bs*bs;
2345     for (i=0; i<x->nz; i++) {
2346       j = 0;
2347       while (j < bs2){
2348         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2349         j++;
2350       }
2351     }
2352     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2353   } else {
2354     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2355   }
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2361 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2362 {
2363   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2364   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2365   MatScalar      *aa = a->a;
2366 
2367   PetscFunctionBegin;
2368   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2374 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2375 {
2376   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2377   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2378   MatScalar      *aa = a->a;
2379 
2380   PetscFunctionBegin;
2381   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 
2386 /* -------------------------------------------------------------------*/
2387 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2388        MatGetRow_SeqBAIJ,
2389        MatRestoreRow_SeqBAIJ,
2390        MatMult_SeqBAIJ_N,
2391 /* 4*/ MatMultAdd_SeqBAIJ_N,
2392        MatMultTranspose_SeqBAIJ,
2393        MatMultTransposeAdd_SeqBAIJ,
2394        MatSolve_SeqBAIJ_N,
2395        0,
2396        0,
2397 /*10*/ 0,
2398        MatLUFactor_SeqBAIJ,
2399        0,
2400        0,
2401        MatTranspose_SeqBAIJ,
2402 /*15*/ MatGetInfo_SeqBAIJ,
2403        MatEqual_SeqBAIJ,
2404        MatGetDiagonal_SeqBAIJ,
2405        MatDiagonalScale_SeqBAIJ,
2406        MatNorm_SeqBAIJ,
2407 /*20*/ 0,
2408        MatAssemblyEnd_SeqBAIJ,
2409        0,
2410        MatSetOption_SeqBAIJ,
2411        MatZeroEntries_SeqBAIJ,
2412 /*25*/ MatZeroRows_SeqBAIJ,
2413        MatLUFactorSymbolic_SeqBAIJ,
2414        MatLUFactorNumeric_SeqBAIJ_N,
2415        MatCholeskyFactorSymbolic_SeqBAIJ,
2416        MatCholeskyFactorNumeric_SeqBAIJ_N,
2417 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2418        MatILUFactorSymbolic_SeqBAIJ,
2419        MatICCFactorSymbolic_SeqBAIJ,
2420        MatGetArray_SeqBAIJ,
2421        MatRestoreArray_SeqBAIJ,
2422 /*35*/ MatDuplicate_SeqBAIJ,
2423        0,
2424        0,
2425        MatILUFactor_SeqBAIJ,
2426        0,
2427 /*40*/ MatAXPY_SeqBAIJ,
2428        MatGetSubMatrices_SeqBAIJ,
2429        MatIncreaseOverlap_SeqBAIJ,
2430        MatGetValues_SeqBAIJ,
2431        MatCopy_SeqBAIJ,
2432 /*45*/ 0,
2433        MatScale_SeqBAIJ,
2434        0,
2435        0,
2436        0,
2437 /*50*/ 0,
2438        MatGetRowIJ_SeqBAIJ,
2439        MatRestoreRowIJ_SeqBAIJ,
2440        0,
2441        0,
2442 /*55*/ 0,
2443        0,
2444        0,
2445        0,
2446        MatSetValuesBlocked_SeqBAIJ,
2447 /*60*/ MatGetSubMatrix_SeqBAIJ,
2448        MatDestroy_SeqBAIJ,
2449        MatView_SeqBAIJ,
2450        0,
2451        0,
2452 /*65*/ 0,
2453        0,
2454        0,
2455        0,
2456        0,
2457 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2458        MatConvert_Basic,
2459        0,
2460        0,
2461        0,
2462 /*75*/ 0,
2463        0,
2464        0,
2465        0,
2466        0,
2467 /*80*/ 0,
2468        0,
2469        0,
2470        0,
2471        MatLoad_SeqBAIJ,
2472 /*85*/ 0,
2473        0,
2474        0,
2475        0,
2476        0,
2477 /*90*/ 0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*95*/ 0,
2483        0,
2484        0,
2485        0,
2486        0,
2487 /*100*/0,
2488        0,
2489        0,
2490        0,
2491        0,
2492 /*105*/0,
2493        MatRealPart_SeqBAIJ,
2494        MatImaginaryPart_SeqBAIJ,
2495        0,
2496        0,
2497 /*110*/0,
2498        0,
2499        0,
2500        0,
2501        MatMissingDiagonal_SeqBAIJ
2502 /*115*/
2503 };
2504 
2505 EXTERN_C_BEGIN
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2508 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2509 {
2510   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2511   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2512   PetscErrorCode ierr;
2513 
2514   PetscFunctionBegin;
2515   if (aij->nonew != 1) {
2516     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2517   }
2518 
2519   /* allocate space for values if not already there */
2520   if (!aij->saved_values) {
2521     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2522   }
2523 
2524   /* copy values over */
2525   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2526   PetscFunctionReturn(0);
2527 }
2528 EXTERN_C_END
2529 
2530 EXTERN_C_BEGIN
2531 #undef __FUNCT__
2532 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2533 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2534 {
2535   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2536   PetscErrorCode ierr;
2537   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2538 
2539   PetscFunctionBegin;
2540   if (aij->nonew != 1) {
2541     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2542   }
2543   if (!aij->saved_values) {
2544     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2545   }
2546 
2547   /* copy values over */
2548   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 EXTERN_C_END
2552 
2553 EXTERN_C_BEGIN
2554 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2556 EXTERN_C_END
2557 
2558 EXTERN_C_BEGIN
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2561 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2562 {
2563   Mat_SeqBAIJ    *b;
2564   PetscErrorCode ierr;
2565   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2566   PetscTruth     flg,skipallocation = PETSC_FALSE;
2567 
2568   PetscFunctionBegin;
2569 
2570   if (nz == MAT_SKIP_ALLOCATION) {
2571     skipallocation = PETSC_TRUE;
2572     nz             = 0;
2573   }
2574 
2575   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2576     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2577   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2578 
2579   if (nnz && newbs != bs) {
2580     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2581   }
2582   bs   = newbs;
2583 
2584   B->rmap.bs = B->cmap.bs = bs;
2585   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2586   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2587 
2588   B->preallocated = PETSC_TRUE;
2589 
2590   mbs  = B->rmap.n/bs;
2591   nbs  = B->cmap.n/bs;
2592   bs2  = bs*bs;
2593 
2594   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2595     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2596   }
2597 
2598   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2599   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2600   if (nnz) {
2601     for (i=0; i<mbs; i++) {
2602       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2603       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2604     }
2605   }
2606 
2607   b       = (Mat_SeqBAIJ*)B->data;
2608   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2609     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2610   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2611 
2612   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2613   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2614   if (!flg) {
2615     switch (bs) {
2616     case 1:
2617       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2618       B->ops->mult            = MatMult_SeqBAIJ_1;
2619       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2620       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2621       break;
2622     case 2:
2623       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2624       B->ops->mult            = MatMult_SeqBAIJ_2;
2625       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2626       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2627       break;
2628     case 3:
2629       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2630       B->ops->mult            = MatMult_SeqBAIJ_3;
2631       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2632       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2633       break;
2634     case 4:
2635       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2636       B->ops->mult            = MatMult_SeqBAIJ_4;
2637       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2638       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2639       break;
2640     case 5:
2641       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2642       B->ops->mult            = MatMult_SeqBAIJ_5;
2643       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2644       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2645       break;
2646     case 6:
2647       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2648       B->ops->mult            = MatMult_SeqBAIJ_6;
2649       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2650       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2651       break;
2652     case 7:
2653       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2654       B->ops->mult            = MatMult_SeqBAIJ_7;
2655       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2656       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2657       break;
2658     default:
2659       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2660       B->ops->mult            = MatMult_SeqBAIJ_N;
2661       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2662       break;
2663     }
2664   }
2665   B->rmap.bs      = bs;
2666   b->mbs     = mbs;
2667   b->nbs     = nbs;
2668   if (!skipallocation) {
2669     if (!b->imax) {
2670       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2671     }
2672     /* b->ilen will count nonzeros in each block row so far. */
2673     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2674     if (!nnz) {
2675       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2676       else if (nz <= 0)        nz = 1;
2677       for (i=0; i<mbs; i++) b->imax[i] = nz;
2678       nz = nz*mbs;
2679     } else {
2680       nz = 0;
2681       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2682     }
2683 
2684     /* allocate the matrix space */
2685     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2686     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2687     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2688     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2689     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2690     b->singlemalloc = PETSC_TRUE;
2691     b->i[0] = 0;
2692     for (i=1; i<mbs+1; i++) {
2693       b->i[i] = b->i[i-1] + b->imax[i-1];
2694     }
2695     b->free_a     = PETSC_TRUE;
2696     b->free_ij    = PETSC_TRUE;
2697   } else {
2698     b->free_a     = PETSC_FALSE;
2699     b->free_ij    = PETSC_FALSE;
2700   }
2701 
2702   B->rmap.bs          = bs;
2703   b->bs2              = bs2;
2704   b->mbs              = mbs;
2705   b->nz               = 0;
2706   b->maxnz            = nz*bs2;
2707   B->info.nz_unneeded = (PetscReal)b->maxnz;
2708   PetscFunctionReturn(0);
2709 }
2710 EXTERN_C_END
2711 
2712 EXTERN_C_BEGIN
2713 #undef __FUNCT__
2714 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2715 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt I[],const PetscInt J[],const PetscScalar V[])
2716 {
2717   PetscInt       i,m,nz,nz_max=0,*nnz;
2718   PetscScalar    *values=0;
2719   PetscErrorCode ierr;
2720 
2721   PetscFunctionBegin;
2722 
2723   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2724   B->rmap.bs = bs;
2725   B->cmap.bs = bs;
2726   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2727   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2728   m = B->rmap.n/bs;
2729 
2730   if (I[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "I[0] must be 0 but it is %D",I[0]); }
2731   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2732   for(i=0; i<m; i++) {
2733     nz = I[i+1]- I[i];
2734     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2735     nz_max = PetscMax(nz_max, nz);
2736     nnz[i] = nz;
2737   }
2738   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2739   ierr = PetscFree(nnz);CHKERRQ(ierr);
2740 
2741   values = (PetscScalar*)V;
2742   if (!values) {
2743     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2744     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2745   }
2746   for (i=0; i<m; i++) {
2747     PetscInt          ncols  = I[i+1] - I[i];
2748     const PetscInt    *icols = J + I[i];
2749     const PetscScalar *svals = values + (V ? (bs*bs*I[i]) : 0);
2750     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2751   }
2752   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2753   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2754   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755 
2756   PetscFunctionReturn(0);
2757 }
2758 EXTERN_C_END
2759 
2760 
2761 EXTERN_C_BEGIN
2762 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2763 EXTERN_C_END
2764 
2765 /*MC
2766    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2767    block sparse compressed row format.
2768 
2769    Options Database Keys:
2770 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2771 
2772   Level: beginner
2773 
2774 .seealso: MatCreateSeqBAIJ()
2775 M*/
2776 
2777 
2778 EXTERN_C_BEGIN
2779 #undef __FUNCT__
2780 #define __FUNCT__ "MatCreate_SeqBAIJ"
2781 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2782 {
2783   PetscErrorCode ierr;
2784   PetscMPIInt    size;
2785   Mat_SeqBAIJ    *b;
2786 
2787   PetscFunctionBegin;
2788   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2789   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2790 
2791   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2792   B->data = (void*)b;
2793   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2794   B->mapping          = 0;
2795   b->row              = 0;
2796   b->col              = 0;
2797   b->icol             = 0;
2798   b->reallocs         = 0;
2799   b->saved_values     = 0;
2800 
2801   b->roworiented      = PETSC_TRUE;
2802   b->nonew            = 0;
2803   b->diag             = 0;
2804   b->solve_work       = 0;
2805   b->mult_work        = 0;
2806   B->spptr            = 0;
2807   B->info.nz_unneeded = (PetscReal)b->maxnz;
2808   b->keepzeroedrows   = PETSC_FALSE;
2809   b->xtoy              = 0;
2810   b->XtoY              = 0;
2811   b->compressedrow.use     = PETSC_FALSE;
2812   b->compressedrow.nrows   = 0;
2813   b->compressedrow.i       = PETSC_NULL;
2814   b->compressedrow.rindex  = PETSC_NULL;
2815   b->compressedrow.checked = PETSC_FALSE;
2816   B->same_nonzero          = PETSC_FALSE;
2817 
2818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2819                                      "MatGetFactor_seqbaij_petsc",
2820                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2822                                      "MatInvertBlockDiagonal_SeqBAIJ",
2823                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2825                                      "MatStoreValues_SeqBAIJ",
2826                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2828                                      "MatRetrieveValues_SeqBAIJ",
2829                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2831                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2832                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2834                                      "MatConvert_SeqBAIJ_SeqAIJ",
2835                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2837                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2838                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2839   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2840                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2841                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2843                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2844                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2845   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2846   PetscFunctionReturn(0);
2847 }
2848 EXTERN_C_END
2849 
2850 #undef __FUNCT__
2851 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2852 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2853 {
2854   Mat            C = *B;
2855   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2856   PetscErrorCode ierr;
2857   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2858 
2859   PetscFunctionBegin;
2860   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2861 
2862   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2863   for (i=0; i<mbs; i++) {
2864     c->imax[i] = a->imax[i];
2865     c->ilen[i] = a->ilen[i];
2866   }
2867 
2868   /* allocate the matrix space */
2869   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2870   c->singlemalloc = PETSC_TRUE;
2871   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2872   if (mbs > 0) {
2873     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2874     if (cpvalues == MAT_COPY_VALUES) {
2875       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2876     } else {
2877       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2878     }
2879   }
2880   c->roworiented = a->roworiented;
2881   c->nonew       = a->nonew;
2882   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
2883   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
2884   c->bs2         = a->bs2;
2885   c->mbs         = a->mbs;
2886   c->nbs         = a->nbs;
2887 
2888   if (a->diag) {
2889     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2890     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2891     for (i=0; i<mbs; i++) {
2892       c->diag[i] = a->diag[i];
2893     }
2894   } else c->diag        = 0;
2895   c->nz                 = a->nz;
2896   c->maxnz              = a->maxnz;
2897   c->solve_work         = 0;
2898   c->mult_work          = 0;
2899   c->free_a             = PETSC_TRUE;
2900   c->free_ij            = PETSC_TRUE;
2901   C->preallocated       = PETSC_TRUE;
2902   C->assembled          = PETSC_TRUE;
2903 
2904   c->compressedrow.use     = a->compressedrow.use;
2905   c->compressedrow.nrows   = a->compressedrow.nrows;
2906   c->compressedrow.checked = a->compressedrow.checked;
2907   if ( a->compressedrow.checked && a->compressedrow.use){
2908     i = a->compressedrow.nrows;
2909     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2910     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2911     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2912     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2913   } else {
2914     c->compressedrow.use    = PETSC_FALSE;
2915     c->compressedrow.i      = PETSC_NULL;
2916     c->compressedrow.rindex = PETSC_NULL;
2917   }
2918   C->same_nonzero = A->same_nonzero;
2919   *B = C;
2920   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2921   PetscFunctionReturn(0);
2922 }
2923 
2924 #undef __FUNCT__
2925 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2926 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2927 {
2928     PetscErrorCode ierr;
2929 
2930   PetscFunctionBegin;
2931   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2932   ierr = MatSetSizes(*B,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2933   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2934   ierr = MatDuplicateNoCreate_SeqBAIJ(A,cpvalues,B);
2935   PetscFunctionReturn(0);
2936 }
2937 
2938 #undef __FUNCT__
2939 #define __FUNCT__ "MatLoad_SeqBAIJ"
2940 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2941 {
2942   Mat_SeqBAIJ    *a;
2943   Mat            B;
2944   PetscErrorCode ierr;
2945   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2946   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2947   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2948   PetscInt       *masked,nmask,tmp,bs2,ishift;
2949   PetscMPIInt    size;
2950   int            fd;
2951   PetscScalar    *aa;
2952   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2953 
2954   PetscFunctionBegin;
2955   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2956     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2957   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2958   bs2  = bs*bs;
2959 
2960   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2961   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2962   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2963   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2964   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2965   M = header[1]; N = header[2]; nz = header[3];
2966 
2967   if (header[3] < 0) {
2968     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2969   }
2970 
2971   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2972 
2973   /*
2974      This code adds extra rows to make sure the number of rows is
2975     divisible by the blocksize
2976   */
2977   mbs        = M/bs;
2978   extra_rows = bs - M + bs*(mbs);
2979   if (extra_rows == bs) extra_rows = 0;
2980   else                  mbs++;
2981   if (extra_rows) {
2982     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2983   }
2984 
2985   /* read in row lengths */
2986   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2987   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2988   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2989 
2990   /* read in column indices */
2991   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2992   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2993   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2994 
2995   /* loop over row lengths determining block row lengths */
2996   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2997   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2998   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2999   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3000   masked   = mask + mbs;
3001   rowcount = 0; nzcount = 0;
3002   for (i=0; i<mbs; i++) {
3003     nmask = 0;
3004     for (j=0; j<bs; j++) {
3005       kmax = rowlengths[rowcount];
3006       for (k=0; k<kmax; k++) {
3007         tmp = jj[nzcount++]/bs;
3008         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3009       }
3010       rowcount++;
3011     }
3012     browlengths[i] += nmask;
3013     /* zero out the mask elements we set */
3014     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3015   }
3016 
3017   /* create our matrix */
3018   ierr = MatCreate(comm,&B);
3019   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3020   ierr = MatSetType(B,type);CHKERRQ(ierr);
3021   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3022   a = (Mat_SeqBAIJ*)B->data;
3023 
3024   /* set matrix "i" values */
3025   a->i[0] = 0;
3026   for (i=1; i<= mbs; i++) {
3027     a->i[i]      = a->i[i-1] + browlengths[i-1];
3028     a->ilen[i-1] = browlengths[i-1];
3029   }
3030   a->nz         = 0;
3031   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3032 
3033   /* read in nonzero values */
3034   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3035   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3036   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3037 
3038   /* set "a" and "j" values into matrix */
3039   nzcount = 0; jcount = 0;
3040   for (i=0; i<mbs; i++) {
3041     nzcountb = nzcount;
3042     nmask    = 0;
3043     for (j=0; j<bs; j++) {
3044       kmax = rowlengths[i*bs+j];
3045       for (k=0; k<kmax; k++) {
3046         tmp = jj[nzcount++]/bs;
3047 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3048       }
3049     }
3050     /* sort the masked values */
3051     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3052 
3053     /* set "j" values into matrix */
3054     maskcount = 1;
3055     for (j=0; j<nmask; j++) {
3056       a->j[jcount++]  = masked[j];
3057       mask[masked[j]] = maskcount++;
3058     }
3059     /* set "a" values into matrix */
3060     ishift = bs2*a->i[i];
3061     for (j=0; j<bs; j++) {
3062       kmax = rowlengths[i*bs+j];
3063       for (k=0; k<kmax; k++) {
3064         tmp       = jj[nzcountb]/bs ;
3065         block     = mask[tmp] - 1;
3066         point     = jj[nzcountb] - bs*tmp;
3067         idx       = ishift + bs2*block + j + bs*point;
3068         a->a[idx] = (MatScalar)aa[nzcountb++];
3069       }
3070     }
3071     /* zero out the mask elements we set */
3072     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3073   }
3074   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3075 
3076   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3077   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3078   ierr = PetscFree(aa);CHKERRQ(ierr);
3079   ierr = PetscFree(jj);CHKERRQ(ierr);
3080   ierr = PetscFree(mask);CHKERRQ(ierr);
3081 
3082   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3083   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3084   ierr = MatView_Private(B);CHKERRQ(ierr);
3085 
3086   *A = B;
3087   PetscFunctionReturn(0);
3088 }
3089 
3090 #undef __FUNCT__
3091 #define __FUNCT__ "MatCreateSeqBAIJ"
3092 /*@C
3093    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3094    compressed row) format.  For good matrix assembly performance the
3095    user should preallocate the matrix storage by setting the parameter nz
3096    (or the array nnz).  By setting these parameters accurately, performance
3097    during matrix assembly can be increased by more than a factor of 50.
3098 
3099    Collective on MPI_Comm
3100 
3101    Input Parameters:
3102 +  comm - MPI communicator, set to PETSC_COMM_SELF
3103 .  bs - size of block
3104 .  m - number of rows
3105 .  n - number of columns
3106 .  nz - number of nonzero blocks  per block row (same for all rows)
3107 -  nnz - array containing the number of nonzero blocks in the various block rows
3108          (possibly different for each block row) or PETSC_NULL
3109 
3110    Output Parameter:
3111 .  A - the matrix
3112 
3113    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3114    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3115    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3116    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3117 
3118    Options Database Keys:
3119 .   -mat_no_unroll - uses code that does not unroll the loops in the
3120                      block calculations (much slower)
3121 .    -mat_block_size - size of the blocks to use
3122 
3123    Level: intermediate
3124 
3125    Notes:
3126    The number of rows and columns must be divisible by blocksize.
3127 
3128    If the nnz parameter is given then the nz parameter is ignored
3129 
3130    A nonzero block is any block that as 1 or more nonzeros in it
3131 
3132    The block AIJ format is fully compatible with standard Fortran 77
3133    storage.  That is, the stored row and column indices can begin at
3134    either one (as in Fortran) or zero.  See the users' manual for details.
3135 
3136    Specify the preallocated storage with either nz or nnz (not both).
3137    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3138    allocation.  For additional details, see the users manual chapter on
3139    matrices.
3140 
3141 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3142 @*/
3143 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3144 {
3145   PetscErrorCode ierr;
3146 
3147   PetscFunctionBegin;
3148   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3149   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3150   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3151   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3152   PetscFunctionReturn(0);
3153 }
3154 
3155 #undef __FUNCT__
3156 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3157 /*@C
3158    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3159    per row in the matrix. For good matrix assembly performance the
3160    user should preallocate the matrix storage by setting the parameter nz
3161    (or the array nnz).  By setting these parameters accurately, performance
3162    during matrix assembly can be increased by more than a factor of 50.
3163 
3164    Collective on MPI_Comm
3165 
3166    Input Parameters:
3167 +  A - the matrix
3168 .  bs - size of block
3169 .  nz - number of block nonzeros per block row (same for all rows)
3170 -  nnz - array containing the number of block nonzeros in the various block rows
3171          (possibly different for each block row) or PETSC_NULL
3172 
3173    Options Database Keys:
3174 .   -mat_no_unroll - uses code that does not unroll the loops in the
3175                      block calculations (much slower)
3176 .    -mat_block_size - size of the blocks to use
3177 
3178    Level: intermediate
3179 
3180    Notes:
3181    If the nnz parameter is given then the nz parameter is ignored
3182 
3183    You can call MatGetInfo() to get information on how effective the preallocation was;
3184    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3185    You can also run with the option -info and look for messages with the string
3186    malloc in them to see if additional memory allocation was needed.
3187 
3188    The block AIJ format is fully compatible with standard Fortran 77
3189    storage.  That is, the stored row and column indices can begin at
3190    either one (as in Fortran) or zero.  See the users' manual for details.
3191 
3192    Specify the preallocated storage with either nz or nnz (not both).
3193    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3194    allocation.  For additional details, see the users manual chapter on
3195    matrices.
3196 
3197 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3198 @*/
3199 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3200 {
3201   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3202 
3203   PetscFunctionBegin;
3204   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3205   if (f) {
3206     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3207   }
3208   PetscFunctionReturn(0);
3209 }
3210 
3211 #undef __FUNCT__
3212 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3213 /*@C
3214    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3215    (the default sequential PETSc format).
3216 
3217    Collective on MPI_Comm
3218 
3219    Input Parameters:
3220 +  A - the matrix
3221 .  i - the indices into j for the start of each local row (starts with zero)
3222 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3223 -  v - optional values in the matrix
3224 
3225    Level: developer
3226 
3227 .keywords: matrix, aij, compressed row, sparse
3228 
3229 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3230 @*/
3231 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3232 {
3233   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3234 
3235   PetscFunctionBegin;
3236   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3237   if (f) {
3238     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3239   }
3240   PetscFunctionReturn(0);
3241 }
3242 
3243 
3244 #undef __FUNCT__
3245 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3246 /*@
3247      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3248               (upper triangular entries in CSR format) provided by the user.
3249 
3250      Collective on MPI_Comm
3251 
3252    Input Parameters:
3253 +  comm - must be an MPI communicator of size 1
3254 .  bs - size of block
3255 .  m - number of rows
3256 .  n - number of columns
3257 .  i - row indices
3258 .  j - column indices
3259 -  a - matrix values
3260 
3261    Output Parameter:
3262 .  mat - the matrix
3263 
3264    Level: intermediate
3265 
3266    Notes:
3267        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3268     once the matrix is destroyed
3269 
3270        You cannot set new nonzero locations into this matrix, that will generate an error.
3271 
3272        The i and j indices are 0 based
3273 
3274 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3275 
3276 @*/
3277 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3278 {
3279   PetscErrorCode ierr;
3280   PetscInt       ii;
3281   Mat_SeqBAIJ    *baij;
3282 
3283   PetscFunctionBegin;
3284   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3285   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3286 
3287   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3288   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3289   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3290   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3291   baij = (Mat_SeqBAIJ*)(*mat)->data;
3292   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3293 
3294   baij->i = i;
3295   baij->j = j;
3296   baij->a = a;
3297   baij->singlemalloc = PETSC_FALSE;
3298   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3299   baij->free_a       = PETSC_FALSE;
3300   baij->free_ij       = PETSC_FALSE;
3301 
3302   for (ii=0; ii<m; ii++) {
3303     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3304 #if defined(PETSC_USE_DEBUG)
3305     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3306 #endif
3307   }
3308 #if defined(PETSC_USE_DEBUG)
3309   for (ii=0; ii<baij->i[m]; ii++) {
3310     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3311     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3312   }
3313 #endif
3314 
3315   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3316   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317   PetscFunctionReturn(0);
3318 }
3319