xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 321429dbebc2cea9152c4f079ee3f90c879c450a)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "../src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "../src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap->bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1264   ierr = PetscFree(a);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1280 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   switch (op) {
1287   case MAT_ROW_ORIENTED:
1288     a->roworiented    = flg;
1289     break;
1290   case MAT_KEEP_ZEROED_ROWS:
1291     a->keepzeroedrows = flg;
1292     break;
1293   case MAT_NEW_NONZERO_LOCATIONS:
1294     a->nonew          = (flg ? 0 : 1);
1295     break;
1296   case MAT_NEW_NONZERO_LOCATION_ERR:
1297     a->nonew          = (flg ? -1 : 0);
1298     break;
1299   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1300     a->nonew          = (flg ? -2 : 0);
1301     break;
1302   case MAT_NEW_DIAGONALS:
1303   case MAT_IGNORE_OFF_PROC_ENTRIES:
1304   case MAT_USE_HASH_TABLE:
1305     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1306     break;
1307   case MAT_SYMMETRIC:
1308   case MAT_STRUCTURALLY_SYMMETRIC:
1309   case MAT_HERMITIAN:
1310   case MAT_SYMMETRY_ETERNAL:
1311     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1312     break;
1313   default:
1314     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1326   MatScalar      *aa,*aa_i;
1327   PetscScalar    *v_i;
1328 
1329   PetscFunctionBegin;
1330   bs  = A->rmap->bs;
1331   ai  = a->i;
1332   aj  = a->j;
1333   aa  = a->a;
1334   bs2 = a->bs2;
1335 
1336   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1337 
1338   bn  = row/bs;   /* Block number */
1339   bp  = row % bs; /* Block Position */
1340   M   = ai[bn+1] - ai[bn];
1341   *nz = bs*M;
1342 
1343   if (v) {
1344     *v = 0;
1345     if (*nz) {
1346       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1347       for (i=0; i<M; i++) { /* for each block in the block row */
1348         v_i  = *v + i*bs;
1349         aa_i = aa + bs2*(ai[bn] + i);
1350         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1351       }
1352     }
1353   }
1354 
1355   if (idx) {
1356     *idx = 0;
1357     if (*nz) {
1358       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1359       for (i=0; i<M; i++) { /* for each block in the block row */
1360         idx_i = *idx + i*bs;
1361         itmp  = bs*aj[ai[bn] + i];
1362         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1363       }
1364     }
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1371 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1377   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1383 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1384 {
1385   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1386   Mat            C;
1387   PetscErrorCode ierr;
1388   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1389   PetscInt       *rows,*cols,bs2=a->bs2;
1390   MatScalar      *array;
1391 
1392   PetscFunctionBegin;
1393   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1394   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1395     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1396     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1397 
1398     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1399     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1400     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1401     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1402     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1403     ierr = PetscFree(col);CHKERRQ(ierr);
1404   } else {
1405     C = *B;
1406   }
1407 
1408   array = a->a;
1409   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1410   cols = rows + bs;
1411   for (i=0; i<mbs; i++) {
1412     cols[0] = i*bs;
1413     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1414     len = ai[i+1] - ai[i];
1415     for (j=0; j<len; j++) {
1416       rows[0] = (*aj++)*bs;
1417       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1418       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1419       array += bs2;
1420     }
1421   }
1422   ierr = PetscFree(rows);CHKERRQ(ierr);
1423 
1424   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426 
1427   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1428     *B = C;
1429   } else {
1430     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1437 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1442   int            fd;
1443   PetscScalar    *aa;
1444   FILE           *file;
1445 
1446   PetscFunctionBegin;
1447   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1448   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1449   col_lens[0] = MAT_FILE_COOKIE;
1450 
1451   col_lens[1] = A->rmap->N;
1452   col_lens[2] = A->cmap->n;
1453   col_lens[3] = a->nz*bs2;
1454 
1455   /* store lengths of each row and write (including header) to file */
1456   count = 0;
1457   for (i=0; i<a->mbs; i++) {
1458     for (j=0; j<bs; j++) {
1459       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1460     }
1461   }
1462   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1463   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1464 
1465   /* store column indices (zero start index) */
1466   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1467   count = 0;
1468   for (i=0; i<a->mbs; i++) {
1469     for (j=0; j<bs; j++) {
1470       for (k=a->i[i]; k<a->i[i+1]; k++) {
1471         for (l=0; l<bs; l++) {
1472           jj[count++] = bs*a->j[k] + l;
1473         }
1474       }
1475     }
1476   }
1477   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1478   ierr = PetscFree(jj);CHKERRQ(ierr);
1479 
1480   /* store nonzero values */
1481   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1482   count = 0;
1483   for (i=0; i<a->mbs; i++) {
1484     for (j=0; j<bs; j++) {
1485       for (k=a->i[i]; k<a->i[i+1]; k++) {
1486         for (l=0; l<bs; l++) {
1487           aa[count++] = a->a[bs2*k + l*bs + j];
1488         }
1489       }
1490     }
1491   }
1492   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1493   ierr = PetscFree(aa);CHKERRQ(ierr);
1494 
1495   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1496   if (file) {
1497     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505 {
1506   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1507   PetscErrorCode    ierr;
1508   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1509   PetscViewerFormat format;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1513   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1514     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1515   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1516     Mat aij;
1517     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1518     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(aij);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1521      PetscFunctionReturn(0);
1522   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1524     for (i=0; i<a->mbs; i++) {
1525       for (j=0; j<bs; j++) {
1526         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1527         for (k=a->i[i]; k<a->i[i+1]; k++) {
1528           for (l=0; l<bs; l++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1532                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1535                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             }
1539 #else
1540             if (a->a[bs2*k + l*bs + j] != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1542             }
1543 #endif
1544           }
1545         }
1546         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1547       }
1548     }
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1550   } else {
1551     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1552     for (i=0; i<a->mbs; i++) {
1553       for (j=0; j<bs; j++) {
1554         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1555         for (k=a->i[i]; k<a->i[i+1]; k++) {
1556           for (l=0; l<bs; l++) {
1557 #if defined(PETSC_USE_COMPLEX)
1558             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1560                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1563                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             }
1567 #else
1568             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1569 #endif
1570           }
1571         }
1572         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1573       }
1574     }
1575     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1576   }
1577   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1583 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1584 {
1585   Mat            A = (Mat) Aa;
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1587   PetscErrorCode ierr;
1588   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1589   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1590   MatScalar      *aa;
1591   PetscViewer    viewer;
1592 
1593   PetscFunctionBegin;
1594 
1595   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1596   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1597 
1598   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1599 
1600   /* loop over matrix elements drawing boxes */
1601   color = PETSC_DRAW_BLUE;
1602   for (i=0,row=0; i<mbs; i++,row+=bs) {
1603     for (j=a->i[i]; j<a->i[i+1]; j++) {
1604       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1605       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606       aa = a->a + j*bs2;
1607       for (k=0; k<bs; k++) {
1608         for (l=0; l<bs; l++) {
1609           if (PetscRealPart(*aa++) >=  0.) continue;
1610           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611         }
1612       }
1613     }
1614   }
1615   color = PETSC_DRAW_CYAN;
1616   for (i=0,row=0; i<mbs; i++,row+=bs) {
1617     for (j=a->i[i]; j<a->i[i+1]; j++) {
1618       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1619       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620       aa = a->a + j*bs2;
1621       for (k=0; k<bs; k++) {
1622         for (l=0; l<bs; l++) {
1623           if (PetscRealPart(*aa++) != 0.) continue;
1624           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625         }
1626       }
1627     }
1628   }
1629 
1630   color = PETSC_DRAW_RED;
1631   for (i=0,row=0; i<mbs; i++,row+=bs) {
1632     for (j=a->i[i]; j<a->i[i+1]; j++) {
1633       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635       aa = a->a + j*bs2;
1636       for (k=0; k<bs; k++) {
1637         for (l=0; l<bs; l++) {
1638           if (PetscRealPart(*aa++) <= 0.) continue;
1639           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640         }
1641       }
1642     }
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1649 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1650 {
1651   PetscErrorCode ierr;
1652   PetscReal      xl,yl,xr,yr,w,h;
1653   PetscDraw      draw;
1654   PetscTruth     isnull;
1655 
1656   PetscFunctionBegin;
1657 
1658   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1659   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1660 
1661   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1662   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1663   xr += w;    yr += h;  xl = -w;     yl = -h;
1664   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1665   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1666   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatView_SeqBAIJ"
1672 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1673 {
1674   PetscErrorCode ierr;
1675   PetscTruth     iascii,isbinary,isdraw;
1676 
1677   PetscFunctionBegin;
1678   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1681   if (iascii){
1682     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1683   } else if (isbinary) {
1684     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1685   } else if (isdraw) {
1686     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1687   } else {
1688     Mat B;
1689     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1690     ierr = MatView(B,viewer);CHKERRQ(ierr);
1691     ierr = MatDestroy(B);CHKERRQ(ierr);
1692   }
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1699 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1700 {
1701   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1702   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1703   PetscInt    *ai = a->i,*ailen = a->ilen;
1704   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1705   MatScalar   *ap,*aa = a->a;
1706 
1707   PetscFunctionBegin;
1708   for (k=0; k<m; k++) { /* loop over rows */
1709     row  = im[k]; brow = row/bs;
1710     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1711     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1712     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1713     nrow = ailen[brow];
1714     for (l=0; l<n; l++) { /* loop over columns */
1715       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1716       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1717       col  = in[l] ;
1718       bcol = col/bs;
1719       cidx = col%bs;
1720       ridx = row%bs;
1721       high = nrow;
1722       low  = 0; /* assume unsorted */
1723       while (high-low > 5) {
1724         t = (low+high)/2;
1725         if (rp[t] > bcol) high = t;
1726         else             low  = t;
1727       }
1728       for (i=low; i<high; i++) {
1729         if (rp[i] > bcol) break;
1730         if (rp[i] == bcol) {
1731           *v++ = ap[bs2*i+bs*cidx+ridx];
1732           goto finished;
1733         }
1734       }
1735       *v++ = 0.0;
1736       finished:;
1737     }
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #define CHUNKSIZE 10
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1745 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1746 {
1747   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1748   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1749   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1750   PetscErrorCode    ierr;
1751   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1752   PetscTruth        roworiented=a->roworiented;
1753   const PetscScalar *value = v;
1754   MatScalar         *ap,*aa = a->a,*bap;
1755 
1756   PetscFunctionBegin;
1757   if (roworiented) {
1758     stepval = (n-1)*bs;
1759   } else {
1760     stepval = (m-1)*bs;
1761   }
1762   for (k=0; k<m; k++) { /* loop over added rows */
1763     row  = im[k];
1764     if (row < 0) continue;
1765 #if defined(PETSC_USE_DEBUG)
1766     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1767 #endif
1768     rp   = aj + ai[row];
1769     ap   = aa + bs2*ai[row];
1770     rmax = imax[row];
1771     nrow = ailen[row];
1772     low  = 0;
1773     high = nrow;
1774     for (l=0; l<n; l++) { /* loop over added columns */
1775       if (in[l] < 0) continue;
1776 #if defined(PETSC_USE_DEBUG)
1777       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1778 #endif
1779       col = in[l];
1780       if (roworiented) {
1781         value = v + k*(stepval+bs)*bs + l*bs;
1782       } else {
1783         value = v + l*(stepval+bs)*bs + k*bs;
1784       }
1785       if (col <= lastcol) low = 0; else high = nrow;
1786       lastcol = col;
1787       while (high-low > 7) {
1788         t = (low+high)/2;
1789         if (rp[t] > col) high = t;
1790         else             low  = t;
1791       }
1792       for (i=low; i<high; i++) {
1793         if (rp[i] > col) break;
1794         if (rp[i] == col) {
1795           bap  = ap +  bs2*i;
1796           if (roworiented) {
1797             if (is == ADD_VALUES) {
1798               for (ii=0; ii<bs; ii++,value+=stepval) {
1799                 for (jj=ii; jj<bs2; jj+=bs) {
1800                   bap[jj] += *value++;
1801                 }
1802               }
1803             } else {
1804               for (ii=0; ii<bs; ii++,value+=stepval) {
1805                 for (jj=ii; jj<bs2; jj+=bs) {
1806                   bap[jj] = *value++;
1807                 }
1808               }
1809             }
1810           } else {
1811             if (is == ADD_VALUES) {
1812               for (ii=0; ii<bs; ii++,value+=stepval) {
1813                 for (jj=0; jj<bs; jj++) {
1814                   *bap++ += *value++;
1815                 }
1816               }
1817             } else {
1818               for (ii=0; ii<bs; ii++,value+=stepval) {
1819                 for (jj=0; jj<bs; jj++) {
1820                   *bap++  = *value++;
1821                 }
1822               }
1823             }
1824           }
1825           goto noinsert2;
1826         }
1827       }
1828       if (nonew == 1) goto noinsert2;
1829       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1830       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1831       N = nrow++ - 1; high++;
1832       /* shift up all the later entries in this row */
1833       for (ii=N; ii>=i; ii--) {
1834         rp[ii+1] = rp[ii];
1835         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1836       }
1837       if (N >= i) {
1838         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1839       }
1840       rp[i] = col;
1841       bap   = ap +  bs2*i;
1842       if (roworiented) {
1843         for (ii=0; ii<bs; ii++,value+=stepval) {
1844           for (jj=ii; jj<bs2; jj+=bs) {
1845             bap[jj] = *value++;
1846           }
1847         }
1848       } else {
1849         for (ii=0; ii<bs; ii++,value+=stepval) {
1850           for (jj=0; jj<bs; jj++) {
1851             *bap++  = *value++;
1852           }
1853         }
1854       }
1855       noinsert2:;
1856       low = i;
1857     }
1858     ailen[row] = nrow;
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1865 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1866 {
1867   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1868   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1869   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1870   PetscErrorCode ierr;
1871   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1872   MatScalar      *aa = a->a,*ap;
1873   PetscReal      ratio=0.6;
1874 
1875   PetscFunctionBegin;
1876   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1877 
1878   if (m) rmax = ailen[0];
1879   for (i=1; i<mbs; i++) {
1880     /* move each row back by the amount of empty slots (fshift) before it*/
1881     fshift += imax[i-1] - ailen[i-1];
1882     rmax   = PetscMax(rmax,ailen[i]);
1883     if (fshift) {
1884       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1885       N = ailen[i];
1886       for (j=0; j<N; j++) {
1887         ip[j-fshift] = ip[j];
1888         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1889       }
1890     }
1891     ai[i] = ai[i-1] + ailen[i-1];
1892   }
1893   if (mbs) {
1894     fshift += imax[mbs-1] - ailen[mbs-1];
1895     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1896   }
1897   /* reset ilen and imax for each row */
1898   for (i=0; i<mbs; i++) {
1899     ailen[i] = imax[i] = ai[i+1] - ai[i];
1900   }
1901   a->nz = ai[mbs];
1902 
1903   /* diagonals may have moved, so kill the diagonal pointers */
1904   a->idiagvalid = PETSC_FALSE;
1905   if (fshift && a->diag) {
1906     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1907     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1908     a->diag = 0;
1909   }
1910   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1911   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1912   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1913   a->reallocs          = 0;
1914   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1915 
1916   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1917   if (a->compressedrow.use){
1918     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1919   }
1920 
1921   A->same_nonzero = PETSC_TRUE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 /*
1926    This function returns an array of flags which indicate the locations of contiguous
1927    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1928    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1929    Assume: sizes should be long enough to hold all the values.
1930 */
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1933 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1934 {
1935   PetscInt   i,j,k,row;
1936   PetscTruth flg;
1937 
1938   PetscFunctionBegin;
1939   for (i=0,j=0; i<n; j++) {
1940     row = idx[i];
1941     if (row%bs!=0) { /* Not the begining of a block */
1942       sizes[j] = 1;
1943       i++;
1944     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1945       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1946       i++;
1947     } else { /* Begining of the block, so check if the complete block exists */
1948       flg = PETSC_TRUE;
1949       for (k=1; k<bs; k++) {
1950         if (row+k != idx[i+k]) { /* break in the block */
1951           flg = PETSC_FALSE;
1952           break;
1953         }
1954       }
1955       if (flg) { /* No break in the bs */
1956         sizes[j] = bs;
1957         i+= bs;
1958       } else {
1959         sizes[j] = 1;
1960         i++;
1961       }
1962     }
1963   }
1964   *bs_max = j;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1970 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1971 {
1972   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1973   PetscErrorCode ierr;
1974   PetscInt       i,j,k,count,*rows;
1975   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1976   PetscScalar    zero = 0.0;
1977   MatScalar      *aa;
1978 
1979   PetscFunctionBegin;
1980   /* Make a copy of the IS and  sort it */
1981   /* allocate memory for rows,sizes */
1982   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1983   sizes = rows + is_n;
1984 
1985   /* copy IS values to rows, and sort them */
1986   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1987   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1988   if (baij->keepzeroedrows) {
1989     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1990     bs_max = is_n;
1991     A->same_nonzero = PETSC_TRUE;
1992   } else {
1993     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1994     A->same_nonzero = PETSC_FALSE;
1995   }
1996 
1997   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1998     row   = rows[j];
1999     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2000     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2001     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2002     if (sizes[i] == bs && !baij->keepzeroedrows) {
2003       if (diag != 0.0) {
2004         if (baij->ilen[row/bs] > 0) {
2005           baij->ilen[row/bs]       = 1;
2006           baij->j[baij->i[row/bs]] = row/bs;
2007           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2008         }
2009         /* Now insert all the diagonal values for this bs */
2010         for (k=0; k<bs; k++) {
2011           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2012         }
2013       } else { /* (diag == 0.0) */
2014         baij->ilen[row/bs] = 0;
2015       } /* end (diag == 0.0) */
2016     } else { /* (sizes[i] != bs) */
2017 #if defined (PETSC_USE_DEBUG)
2018       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2019 #endif
2020       for (k=0; k<count; k++) {
2021         aa[0] =  zero;
2022         aa    += bs;
2023       }
2024       if (diag != 0.0) {
2025         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2026       }
2027     }
2028   }
2029 
2030   ierr = PetscFree(rows);CHKERRQ(ierr);
2031   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2037 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2038 {
2039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2040   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2041   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2042   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2043   PetscErrorCode ierr;
2044   PetscInt       ridx,cidx,bs2=a->bs2;
2045   PetscTruth     roworiented=a->roworiented;
2046   MatScalar      *ap,value,*aa=a->a,*bap;
2047 
2048   PetscFunctionBegin;
2049   for (k=0; k<m; k++) { /* loop over added rows */
2050     row  = im[k];
2051     brow = row/bs;
2052     if (row < 0) continue;
2053 #if defined(PETSC_USE_DEBUG)
2054     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2055 #endif
2056     rp   = aj + ai[brow];
2057     ap   = aa + bs2*ai[brow];
2058     rmax = imax[brow];
2059     nrow = ailen[brow];
2060     low  = 0;
2061     high = nrow;
2062     for (l=0; l<n; l++) { /* loop over added columns */
2063       if (in[l] < 0) continue;
2064 #if defined(PETSC_USE_DEBUG)
2065       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2066 #endif
2067       col = in[l]; bcol = col/bs;
2068       ridx = row % bs; cidx = col % bs;
2069       if (roworiented) {
2070         value = v[l + k*n];
2071       } else {
2072         value = v[k + l*m];
2073       }
2074       if (col <= lastcol) low = 0; else high = nrow;
2075       lastcol = col;
2076       while (high-low > 7) {
2077         t = (low+high)/2;
2078         if (rp[t] > bcol) high = t;
2079         else              low  = t;
2080       }
2081       for (i=low; i<high; i++) {
2082         if (rp[i] > bcol) break;
2083         if (rp[i] == bcol) {
2084           bap  = ap +  bs2*i + bs*cidx + ridx;
2085           if (is == ADD_VALUES) *bap += value;
2086           else                  *bap  = value;
2087           goto noinsert1;
2088         }
2089       }
2090       if (nonew == 1) goto noinsert1;
2091       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2092       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2093       N = nrow++ - 1; high++;
2094       /* shift up all the later entries in this row */
2095       for (ii=N; ii>=i; ii--) {
2096         rp[ii+1] = rp[ii];
2097         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2098       }
2099       if (N>=i) {
2100         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2101       }
2102       rp[i]                      = bcol;
2103       ap[bs2*i + bs*cidx + ridx] = value;
2104       a->nz++;
2105       noinsert1:;
2106       low = i;
2107     }
2108     ailen[brow] = nrow;
2109   }
2110   A->same_nonzero = PETSC_FALSE;
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2118 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2119 {
2120   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2121   Mat            outA;
2122   PetscErrorCode ierr;
2123   PetscTruth     row_identity,col_identity;
2124 
2125   PetscFunctionBegin;
2126   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2127   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2128   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2129   if (!row_identity || !col_identity) {
2130     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2131   }
2132 
2133   outA          = inA;
2134   inA->factor   = MAT_FACTOR_LU;
2135 
2136   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2137 
2138   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2139   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2140   a->row = row;
2141   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2142   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2143   a->col = col;
2144 
2145   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2146   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2148 
2149   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2150   if (!a->solve_work) {
2151     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2152     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2153   }
2154   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2155 
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 EXTERN_C_BEGIN
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2162 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2163 {
2164   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2165   PetscInt    i,nz,nbs;
2166 
2167   PetscFunctionBegin;
2168   nz  = baij->maxnz/baij->bs2;
2169   nbs = baij->nbs;
2170   for (i=0; i<nz; i++) {
2171     baij->j[i] = indices[i];
2172   }
2173   baij->nz = nz;
2174   for (i=0; i<nbs; i++) {
2175     baij->ilen[i] = baij->imax[i];
2176   }
2177 
2178   PetscFunctionReturn(0);
2179 }
2180 EXTERN_C_END
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2184 /*@
2185     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2186        in the matrix.
2187 
2188   Input Parameters:
2189 +  mat - the SeqBAIJ matrix
2190 -  indices - the column indices
2191 
2192   Level: advanced
2193 
2194   Notes:
2195     This can be called if you have precomputed the nonzero structure of the
2196   matrix and want to provide it to the matrix object to improve the performance
2197   of the MatSetValues() operation.
2198 
2199     You MUST have set the correct numbers of nonzeros per row in the call to
2200   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2201 
2202     MUST be called before any calls to MatSetValues();
2203 
2204 @*/
2205 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2206 {
2207   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2208 
2209   PetscFunctionBegin;
2210   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2211   PetscValidPointer(indices,2);
2212   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2213   if (f) {
2214     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2215   } else {
2216     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2223 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2224 {
2225   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2226   PetscErrorCode ierr;
2227   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2228   PetscReal      atmp;
2229   PetscScalar    *x,zero = 0.0;
2230   MatScalar      *aa;
2231   PetscInt       ncols,brow,krow,kcol;
2232 
2233   PetscFunctionBegin;
2234   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2235   bs   = A->rmap->bs;
2236   aa   = a->a;
2237   ai   = a->i;
2238   aj   = a->j;
2239   mbs  = a->mbs;
2240 
2241   ierr = VecSet(v,zero);CHKERRQ(ierr);
2242   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2243   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2244   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2245   for (i=0; i<mbs; i++) {
2246     ncols = ai[1] - ai[0]; ai++;
2247     brow  = bs*i;
2248     for (j=0; j<ncols; j++){
2249       for (kcol=0; kcol<bs; kcol++){
2250         for (krow=0; krow<bs; krow++){
2251           atmp = PetscAbsScalar(*aa);aa++;
2252           row = brow + krow;    /* row index */
2253           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2254           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2255         }
2256       }
2257       aj++;
2258     }
2259   }
2260   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatCopy_SeqBAIJ"
2266 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2267 {
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   /* If the two matrices have the same copy implementation, use fast copy. */
2272   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2273     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2274     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2275 
2276     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2277       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2278     }
2279     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2280   } else {
2281     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2282   }
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2288 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2289 {
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2299 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2300 {
2301   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2302   PetscFunctionBegin;
2303   *array = a->a;
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2309 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2310 {
2311   PetscFunctionBegin;
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 #include "petscblaslapack.h"
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2318 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2319 {
2320   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2321   PetscErrorCode ierr;
2322   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2323   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2324 
2325   PetscFunctionBegin;
2326   if (str == SAME_NONZERO_PATTERN) {
2327     PetscScalar alpha = a;
2328     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2329   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2330     if (y->xtoy && y->XtoY != X) {
2331       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2332       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2333     }
2334     if (!y->xtoy) { /* get xtoy */
2335       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2336       y->XtoY = X;
2337       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2338     }
2339     bs2 = bs*bs;
2340     for (i=0; i<x->nz; i++) {
2341       j = 0;
2342       while (j < bs2){
2343         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2344         j++;
2345       }
2346     }
2347     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2348   } else {
2349     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2350   }
2351   PetscFunctionReturn(0);
2352 }
2353 
2354 #undef __FUNCT__
2355 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2356 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2357 {
2358   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2359   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2360   MatScalar      *aa = a->a;
2361 
2362   PetscFunctionBegin;
2363   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2369 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2370 {
2371   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2372   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2373   MatScalar      *aa = a->a;
2374 
2375   PetscFunctionBegin;
2376   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 
2381 /* -------------------------------------------------------------------*/
2382 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2383        MatGetRow_SeqBAIJ,
2384        MatRestoreRow_SeqBAIJ,
2385        MatMult_SeqBAIJ_N,
2386 /* 4*/ MatMultAdd_SeqBAIJ_N,
2387        MatMultTranspose_SeqBAIJ,
2388        MatMultTransposeAdd_SeqBAIJ,
2389        0,
2390        0,
2391        0,
2392 /*10*/ 0,
2393        MatLUFactor_SeqBAIJ,
2394        0,
2395        0,
2396        MatTranspose_SeqBAIJ,
2397 /*15*/ MatGetInfo_SeqBAIJ,
2398        MatEqual_SeqBAIJ,
2399        MatGetDiagonal_SeqBAIJ,
2400        MatDiagonalScale_SeqBAIJ,
2401        MatNorm_SeqBAIJ,
2402 /*20*/ 0,
2403        MatAssemblyEnd_SeqBAIJ,
2404        0,
2405        MatSetOption_SeqBAIJ,
2406        MatZeroEntries_SeqBAIJ,
2407 /*25*/ MatZeroRows_SeqBAIJ,
2408        0,
2409        0,
2410        0,
2411        0,
2412 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2413        0,
2414        0,
2415        MatGetArray_SeqBAIJ,
2416        MatRestoreArray_SeqBAIJ,
2417 /*35*/ MatDuplicate_SeqBAIJ,
2418        0,
2419        0,
2420        MatILUFactor_SeqBAIJ,
2421        0,
2422 /*40*/ MatAXPY_SeqBAIJ,
2423        MatGetSubMatrices_SeqBAIJ,
2424        MatIncreaseOverlap_SeqBAIJ,
2425        MatGetValues_SeqBAIJ,
2426        MatCopy_SeqBAIJ,
2427 /*45*/ 0,
2428        MatScale_SeqBAIJ,
2429        0,
2430        0,
2431        0,
2432 /*50*/ 0,
2433        MatGetRowIJ_SeqBAIJ,
2434        MatRestoreRowIJ_SeqBAIJ,
2435        0,
2436        0,
2437 /*55*/ 0,
2438        0,
2439        0,
2440        0,
2441        MatSetValuesBlocked_SeqBAIJ,
2442 /*60*/ MatGetSubMatrix_SeqBAIJ,
2443        MatDestroy_SeqBAIJ,
2444        MatView_SeqBAIJ,
2445        0,
2446        0,
2447 /*65*/ 0,
2448        0,
2449        0,
2450        0,
2451        0,
2452 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2453        0,
2454        MatConvert_Basic,
2455        0,
2456        0,
2457 /*75*/ 0,
2458        0,
2459        0,
2460        0,
2461        0,
2462 /*80*/ 0,
2463        0,
2464        0,
2465        0,
2466        MatLoad_SeqBAIJ,
2467 /*85*/ 0,
2468        0,
2469        0,
2470        0,
2471        0,
2472 /*90*/ 0,
2473        0,
2474        0,
2475        0,
2476        0,
2477 /*95*/ 0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*100*/0,
2483        0,
2484        0,
2485        0,
2486        0,
2487 /*105*/0,
2488        MatRealPart_SeqBAIJ,
2489        MatImaginaryPart_SeqBAIJ,
2490        0,
2491        0,
2492 /*110*/0,
2493        0,
2494        0,
2495        0,
2496        MatMissingDiagonal_SeqBAIJ
2497 /*115*/
2498 };
2499 
2500 EXTERN_C_BEGIN
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2503 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2504 {
2505   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2506   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2507   PetscErrorCode ierr;
2508 
2509   PetscFunctionBegin;
2510   if (aij->nonew != 1) {
2511     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2512   }
2513 
2514   /* allocate space for values if not already there */
2515   if (!aij->saved_values) {
2516     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2517   }
2518 
2519   /* copy values over */
2520   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2521   PetscFunctionReturn(0);
2522 }
2523 EXTERN_C_END
2524 
2525 EXTERN_C_BEGIN
2526 #undef __FUNCT__
2527 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2528 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2529 {
2530   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2531   PetscErrorCode ierr;
2532   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2533 
2534   PetscFunctionBegin;
2535   if (aij->nonew != 1) {
2536     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2537   }
2538   if (!aij->saved_values) {
2539     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2540   }
2541 
2542   /* copy values over */
2543   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2544   PetscFunctionReturn(0);
2545 }
2546 EXTERN_C_END
2547 
2548 EXTERN_C_BEGIN
2549 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2550 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2551 EXTERN_C_END
2552 
2553 EXTERN_C_BEGIN
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2556 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2557 {
2558   Mat_SeqBAIJ    *b;
2559   PetscErrorCode ierr;
2560   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2561   PetscTruth     flg,skipallocation = PETSC_FALSE;
2562 
2563   PetscFunctionBegin;
2564 
2565   if (nz == MAT_SKIP_ALLOCATION) {
2566     skipallocation = PETSC_TRUE;
2567     nz             = 0;
2568   }
2569 
2570   if (bs < 0) {
2571     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2572       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2573     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2574     bs   = PetscAbs(bs);
2575   }
2576   if (nnz && newbs != bs) {
2577     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2578   }
2579   bs   = newbs;
2580 
2581   B->rmap->bs = B->cmap->bs = bs;
2582   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2583   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2584 
2585   B->preallocated = PETSC_TRUE;
2586 
2587   mbs  = B->rmap->n/bs;
2588   nbs  = B->cmap->n/bs;
2589   bs2  = bs*bs;
2590 
2591   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2592     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2593   }
2594 
2595   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2596   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2597   if (nnz) {
2598     for (i=0; i<mbs; i++) {
2599       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2600       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2601     }
2602   }
2603 
2604   b       = (Mat_SeqBAIJ*)B->data;
2605   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2606     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2607   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2608 
2609   if (!flg) {
2610     switch (bs) {
2611     case 1:
2612       B->ops->mult            = MatMult_SeqBAIJ_1;
2613       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2614       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2615       break;
2616     case 2:
2617       B->ops->mult            = MatMult_SeqBAIJ_2;
2618       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2619       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2620       break;
2621     case 3:
2622       B->ops->mult            = MatMult_SeqBAIJ_3;
2623       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2624       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2625       break;
2626     case 4:
2627       B->ops->mult            = MatMult_SeqBAIJ_4;
2628       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2629       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2630       break;
2631     case 5:
2632       B->ops->mult            = MatMult_SeqBAIJ_5;
2633       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2634       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2635       break;
2636     case 6:
2637       B->ops->mult            = MatMult_SeqBAIJ_6;
2638       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2639       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2640       break;
2641     case 7:
2642       B->ops->mult            = MatMult_SeqBAIJ_7;
2643       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2644       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2645       break;
2646     default:
2647       B->ops->mult            = MatMult_SeqBAIJ_N;
2648       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2649       break;
2650     }
2651   }
2652   B->rmap->bs      = bs;
2653   b->mbs     = mbs;
2654   b->nbs     = nbs;
2655   if (!skipallocation) {
2656     if (!b->imax) {
2657       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2658     }
2659     /* b->ilen will count nonzeros in each block row so far. */
2660     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2661     if (!nnz) {
2662       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2663       else if (nz <= 0)        nz = 1;
2664       for (i=0; i<mbs; i++) b->imax[i] = nz;
2665       nz = nz*mbs;
2666     } else {
2667       nz = 0;
2668       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2669     }
2670 
2671     /* allocate the matrix space */
2672     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2673     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2674     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2675     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2676     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2677     b->singlemalloc = PETSC_TRUE;
2678     b->i[0] = 0;
2679     for (i=1; i<mbs+1; i++) {
2680       b->i[i] = b->i[i-1] + b->imax[i-1];
2681     }
2682     b->free_a     = PETSC_TRUE;
2683     b->free_ij    = PETSC_TRUE;
2684   } else {
2685     b->free_a     = PETSC_FALSE;
2686     b->free_ij    = PETSC_FALSE;
2687   }
2688 
2689   B->rmap->bs          = bs;
2690   b->bs2              = bs2;
2691   b->mbs              = mbs;
2692   b->nz               = 0;
2693   b->maxnz            = nz*bs2;
2694   B->info.nz_unneeded = (PetscReal)b->maxnz;
2695   PetscFunctionReturn(0);
2696 }
2697 EXTERN_C_END
2698 
2699 EXTERN_C_BEGIN
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2702 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2703 {
2704   PetscInt       i,m,nz,nz_max=0,*nnz;
2705   PetscScalar    *values=0;
2706   PetscErrorCode ierr;
2707 
2708   PetscFunctionBegin;
2709 
2710   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2711   B->rmap->bs = bs;
2712   B->cmap->bs = bs;
2713   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2714   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2715   m = B->rmap->n/bs;
2716 
2717   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2718   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2719   for(i=0; i<m; i++) {
2720     nz = ii[i+1]- ii[i];
2721     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2722     nz_max = PetscMax(nz_max, nz);
2723     nnz[i] = nz;
2724   }
2725   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2726   ierr = PetscFree(nnz);CHKERRQ(ierr);
2727 
2728   values = (PetscScalar*)V;
2729   if (!values) {
2730     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2731     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2732   }
2733   for (i=0; i<m; i++) {
2734     PetscInt          ncols  = ii[i+1] - ii[i];
2735     const PetscInt    *icols = jj + ii[i];
2736     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2737     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2738   }
2739   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2740   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2742 
2743   PetscFunctionReturn(0);
2744 }
2745 EXTERN_C_END
2746 
2747 
2748 EXTERN_C_BEGIN
2749 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2750 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2751 EXTERN_C_END
2752 
2753 /*MC
2754    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2755    block sparse compressed row format.
2756 
2757    Options Database Keys:
2758 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2759 
2760   Level: beginner
2761 
2762 .seealso: MatCreateSeqBAIJ()
2763 M*/
2764 
2765 
2766 EXTERN_C_BEGIN
2767 #undef __FUNCT__
2768 #define __FUNCT__ "MatCreate_SeqBAIJ"
2769 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2770 {
2771   PetscErrorCode ierr;
2772   PetscMPIInt    size;
2773   Mat_SeqBAIJ    *b;
2774 
2775   PetscFunctionBegin;
2776   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2777   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2778 
2779   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2780   B->data = (void*)b;
2781   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2782   B->mapping          = 0;
2783   b->row              = 0;
2784   b->col              = 0;
2785   b->icol             = 0;
2786   b->reallocs         = 0;
2787   b->saved_values     = 0;
2788 
2789   b->roworiented      = PETSC_TRUE;
2790   b->nonew            = 0;
2791   b->diag             = 0;
2792   b->solve_work       = 0;
2793   b->mult_work        = 0;
2794   B->spptr            = 0;
2795   B->info.nz_unneeded = (PetscReal)b->maxnz;
2796   b->keepzeroedrows   = PETSC_FALSE;
2797   b->xtoy              = 0;
2798   b->XtoY              = 0;
2799   b->compressedrow.use     = PETSC_FALSE;
2800   b->compressedrow.nrows   = 0;
2801   b->compressedrow.i       = PETSC_NULL;
2802   b->compressedrow.rindex  = PETSC_NULL;
2803   b->compressedrow.checked = PETSC_FALSE;
2804   B->same_nonzero          = PETSC_FALSE;
2805 
2806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2807                                      "MatGetFactorAvailable_seqbaij_petsc",
2808                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2810                                      "MatGetFactor_seqbaij_petsc",
2811                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2813                                      "MatInvertBlockDiagonal_SeqBAIJ",
2814                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2815   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2816                                      "MatStoreValues_SeqBAIJ",
2817                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2819                                      "MatRetrieveValues_SeqBAIJ",
2820                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2822                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2823                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2825                                      "MatConvert_SeqBAIJ_SeqAIJ",
2826                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2828                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2829                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2831                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2832                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2834                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2835                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2836   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 EXTERN_C_END
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2843 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2844 {
2845   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2846   PetscErrorCode ierr;
2847   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2848 
2849   PetscFunctionBegin;
2850   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2851 
2852   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2853   for (i=0; i<mbs; i++) {
2854     c->imax[i] = a->imax[i];
2855     c->ilen[i] = a->ilen[i];
2856   }
2857 
2858   /* allocate the matrix space */
2859   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2860   c->singlemalloc = PETSC_TRUE;
2861   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2862   if (mbs > 0) {
2863     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2864     if (cpvalues == MAT_COPY_VALUES) {
2865       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2866     } else {
2867       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2868     }
2869   }
2870   c->roworiented = a->roworiented;
2871   c->nonew       = a->nonew;
2872   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2873   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2874   c->bs2         = a->bs2;
2875   c->mbs         = a->mbs;
2876   c->nbs         = a->nbs;
2877 
2878   if (a->diag) {
2879     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2880     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2881     for (i=0; i<mbs; i++) {
2882       c->diag[i] = a->diag[i];
2883     }
2884   } else c->diag        = 0;
2885   c->nz                 = a->nz;
2886   c->maxnz              = a->maxnz;
2887   c->solve_work         = 0;
2888   c->mult_work          = 0;
2889   c->free_a             = PETSC_TRUE;
2890   c->free_ij            = PETSC_TRUE;
2891   C->preallocated       = PETSC_TRUE;
2892   C->assembled          = PETSC_TRUE;
2893 
2894   c->compressedrow.use     = a->compressedrow.use;
2895   c->compressedrow.nrows   = a->compressedrow.nrows;
2896   c->compressedrow.checked = a->compressedrow.checked;
2897   if ( a->compressedrow.checked && a->compressedrow.use){
2898     i = a->compressedrow.nrows;
2899     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2900     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2901     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2902     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2903   } else {
2904     c->compressedrow.use    = PETSC_FALSE;
2905     c->compressedrow.i      = PETSC_NULL;
2906     c->compressedrow.rindex = PETSC_NULL;
2907   }
2908   C->same_nonzero = A->same_nonzero;
2909   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2910   PetscFunctionReturn(0);
2911 }
2912 
2913 #undef __FUNCT__
2914 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2915 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2916 {
2917     PetscErrorCode ierr;
2918 
2919   PetscFunctionBegin;
2920   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2921   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2922   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2923   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2924   PetscFunctionReturn(0);
2925 }
2926 
2927 #undef __FUNCT__
2928 #define __FUNCT__ "MatLoad_SeqBAIJ"
2929 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2930 {
2931   Mat_SeqBAIJ    *a;
2932   Mat            B;
2933   PetscErrorCode ierr;
2934   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2935   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2936   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2937   PetscInt       *masked,nmask,tmp,bs2,ishift;
2938   PetscMPIInt    size;
2939   int            fd;
2940   PetscScalar    *aa;
2941   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2942 
2943   PetscFunctionBegin;
2944   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2945     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2946   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2947   bs2  = bs*bs;
2948 
2949   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2950   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2951   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2952   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2953   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2954   M = header[1]; N = header[2]; nz = header[3];
2955 
2956   if (header[3] < 0) {
2957     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2958   }
2959 
2960   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2961 
2962   /*
2963      This code adds extra rows to make sure the number of rows is
2964     divisible by the blocksize
2965   */
2966   mbs        = M/bs;
2967   extra_rows = bs - M + bs*(mbs);
2968   if (extra_rows == bs) extra_rows = 0;
2969   else                  mbs++;
2970   if (extra_rows) {
2971     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2972   }
2973 
2974   /* read in row lengths */
2975   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2976   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2977   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2978 
2979   /* read in column indices */
2980   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2981   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2982   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2983 
2984   /* loop over row lengths determining block row lengths */
2985   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2986   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2987   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2988   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2989   masked   = mask + mbs;
2990   rowcount = 0; nzcount = 0;
2991   for (i=0; i<mbs; i++) {
2992     nmask = 0;
2993     for (j=0; j<bs; j++) {
2994       kmax = rowlengths[rowcount];
2995       for (k=0; k<kmax; k++) {
2996         tmp = jj[nzcount++]/bs;
2997         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2998       }
2999       rowcount++;
3000     }
3001     browlengths[i] += nmask;
3002     /* zero out the mask elements we set */
3003     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3004   }
3005 
3006   /* create our matrix */
3007   ierr = MatCreate(comm,&B);
3008   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3009   ierr = MatSetType(B,type);CHKERRQ(ierr);
3010   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3011   a = (Mat_SeqBAIJ*)B->data;
3012 
3013   /* set matrix "i" values */
3014   a->i[0] = 0;
3015   for (i=1; i<= mbs; i++) {
3016     a->i[i]      = a->i[i-1] + browlengths[i-1];
3017     a->ilen[i-1] = browlengths[i-1];
3018   }
3019   a->nz         = 0;
3020   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3021 
3022   /* read in nonzero values */
3023   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3024   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3025   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3026 
3027   /* set "a" and "j" values into matrix */
3028   nzcount = 0; jcount = 0;
3029   for (i=0; i<mbs; i++) {
3030     nzcountb = nzcount;
3031     nmask    = 0;
3032     for (j=0; j<bs; j++) {
3033       kmax = rowlengths[i*bs+j];
3034       for (k=0; k<kmax; k++) {
3035         tmp = jj[nzcount++]/bs;
3036 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3037       }
3038     }
3039     /* sort the masked values */
3040     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3041 
3042     /* set "j" values into matrix */
3043     maskcount = 1;
3044     for (j=0; j<nmask; j++) {
3045       a->j[jcount++]  = masked[j];
3046       mask[masked[j]] = maskcount++;
3047     }
3048     /* set "a" values into matrix */
3049     ishift = bs2*a->i[i];
3050     for (j=0; j<bs; j++) {
3051       kmax = rowlengths[i*bs+j];
3052       for (k=0; k<kmax; k++) {
3053         tmp       = jj[nzcountb]/bs ;
3054         block     = mask[tmp] - 1;
3055         point     = jj[nzcountb] - bs*tmp;
3056         idx       = ishift + bs2*block + j + bs*point;
3057         a->a[idx] = (MatScalar)aa[nzcountb++];
3058       }
3059     }
3060     /* zero out the mask elements we set */
3061     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3062   }
3063   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3064 
3065   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3066   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3067   ierr = PetscFree(aa);CHKERRQ(ierr);
3068   ierr = PetscFree(jj);CHKERRQ(ierr);
3069   ierr = PetscFree(mask);CHKERRQ(ierr);
3070 
3071   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3072   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = MatView_Private(B);CHKERRQ(ierr);
3074 
3075   *A = B;
3076   PetscFunctionReturn(0);
3077 }
3078 
3079 #undef __FUNCT__
3080 #define __FUNCT__ "MatCreateSeqBAIJ"
3081 /*@C
3082    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3083    compressed row) format.  For good matrix assembly performance the
3084    user should preallocate the matrix storage by setting the parameter nz
3085    (or the array nnz).  By setting these parameters accurately, performance
3086    during matrix assembly can be increased by more than a factor of 50.
3087 
3088    Collective on MPI_Comm
3089 
3090    Input Parameters:
3091 +  comm - MPI communicator, set to PETSC_COMM_SELF
3092 .  bs - size of block
3093 .  m - number of rows
3094 .  n - number of columns
3095 .  nz - number of nonzero blocks  per block row (same for all rows)
3096 -  nnz - array containing the number of nonzero blocks in the various block rows
3097          (possibly different for each block row) or PETSC_NULL
3098 
3099    Output Parameter:
3100 .  A - the matrix
3101 
3102    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3103    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3104    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3105    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3106 
3107    Options Database Keys:
3108 .   -mat_no_unroll - uses code that does not unroll the loops in the
3109                      block calculations (much slower)
3110 .    -mat_block_size - size of the blocks to use
3111 
3112    Level: intermediate
3113 
3114    Notes:
3115    The number of rows and columns must be divisible by blocksize.
3116 
3117    If the nnz parameter is given then the nz parameter is ignored
3118 
3119    A nonzero block is any block that as 1 or more nonzeros in it
3120 
3121    The block AIJ format is fully compatible with standard Fortran 77
3122    storage.  That is, the stored row and column indices can begin at
3123    either one (as in Fortran) or zero.  See the users' manual for details.
3124 
3125    Specify the preallocated storage with either nz or nnz (not both).
3126    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3127    allocation.  For additional details, see the users manual chapter on
3128    matrices.
3129 
3130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3131 @*/
3132 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3133 {
3134   PetscErrorCode ierr;
3135 
3136   PetscFunctionBegin;
3137   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3138   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3139   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3140   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3146 /*@C
3147    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3148    per row in the matrix. For good matrix assembly performance the
3149    user should preallocate the matrix storage by setting the parameter nz
3150    (or the array nnz).  By setting these parameters accurately, performance
3151    during matrix assembly can be increased by more than a factor of 50.
3152 
3153    Collective on MPI_Comm
3154 
3155    Input Parameters:
3156 +  A - the matrix
3157 .  bs - size of block
3158 .  nz - number of block nonzeros per block row (same for all rows)
3159 -  nnz - array containing the number of block nonzeros in the various block rows
3160          (possibly different for each block row) or PETSC_NULL
3161 
3162    Options Database Keys:
3163 .   -mat_no_unroll - uses code that does not unroll the loops in the
3164                      block calculations (much slower)
3165 .    -mat_block_size - size of the blocks to use
3166 
3167    Level: intermediate
3168 
3169    Notes:
3170    If the nnz parameter is given then the nz parameter is ignored
3171 
3172    You can call MatGetInfo() to get information on how effective the preallocation was;
3173    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3174    You can also run with the option -info and look for messages with the string
3175    malloc in them to see if additional memory allocation was needed.
3176 
3177    The block AIJ format is fully compatible with standard Fortran 77
3178    storage.  That is, the stored row and column indices can begin at
3179    either one (as in Fortran) or zero.  See the users' manual for details.
3180 
3181    Specify the preallocated storage with either nz or nnz (not both).
3182    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3183    allocation.  For additional details, see the users manual chapter on
3184    matrices.
3185 
3186 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3187 @*/
3188 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3189 {
3190   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3191 
3192   PetscFunctionBegin;
3193   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3194   if (f) {
3195     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3196   }
3197   PetscFunctionReturn(0);
3198 }
3199 
3200 #undef __FUNCT__
3201 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3202 /*@C
3203    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3204    (the default sequential PETSc format).
3205 
3206    Collective on MPI_Comm
3207 
3208    Input Parameters:
3209 +  A - the matrix
3210 .  i - the indices into j for the start of each local row (starts with zero)
3211 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3212 -  v - optional values in the matrix
3213 
3214    Level: developer
3215 
3216 .keywords: matrix, aij, compressed row, sparse
3217 
3218 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3219 @*/
3220 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3221 {
3222   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3223 
3224   PetscFunctionBegin;
3225   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3226   if (f) {
3227     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3228   }
3229   PetscFunctionReturn(0);
3230 }
3231 
3232 
3233 #undef __FUNCT__
3234 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3235 /*@
3236      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3237               (upper triangular entries in CSR format) provided by the user.
3238 
3239      Collective on MPI_Comm
3240 
3241    Input Parameters:
3242 +  comm - must be an MPI communicator of size 1
3243 .  bs - size of block
3244 .  m - number of rows
3245 .  n - number of columns
3246 .  i - row indices
3247 .  j - column indices
3248 -  a - matrix values
3249 
3250    Output Parameter:
3251 .  mat - the matrix
3252 
3253    Level: intermediate
3254 
3255    Notes:
3256        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3257     once the matrix is destroyed
3258 
3259        You cannot set new nonzero locations into this matrix, that will generate an error.
3260 
3261        The i and j indices are 0 based
3262 
3263 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3264 
3265 @*/
3266 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3267 {
3268   PetscErrorCode ierr;
3269   PetscInt       ii;
3270   Mat_SeqBAIJ    *baij;
3271 
3272   PetscFunctionBegin;
3273   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3274   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3275 
3276   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3277   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3278   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3279   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3280   baij = (Mat_SeqBAIJ*)(*mat)->data;
3281   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3282 
3283   baij->i = i;
3284   baij->j = j;
3285   baij->a = a;
3286   baij->singlemalloc = PETSC_FALSE;
3287   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3288   baij->free_a       = PETSC_FALSE;
3289   baij->free_ij       = PETSC_FALSE;
3290 
3291   for (ii=0; ii<m; ii++) {
3292     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3293 #if defined(PETSC_USE_DEBUG)
3294     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3295 #endif
3296   }
3297 #if defined(PETSC_USE_DEBUG)
3298   for (ii=0; ii<baij->i[m]; ii++) {
3299     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3300     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3301   }
3302 #endif
3303 
3304   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3305   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3306   PetscFunctionReturn(0);
3307 }
3308