xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a4efd8ea4bb0600a5aef92b2c23cbfa314827193)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap->bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1264   ierr = PetscFree(a);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1280 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   switch (op) {
1287   case MAT_ROW_ORIENTED:
1288     a->roworiented    = flg;
1289     break;
1290   case MAT_KEEP_ZEROED_ROWS:
1291     a->keepzeroedrows = flg;
1292     break;
1293   case MAT_NEW_NONZERO_LOCATIONS:
1294     a->nonew          = (flg ? 0 : 1);
1295     break;
1296   case MAT_NEW_NONZERO_LOCATION_ERR:
1297     a->nonew          = (flg ? -1 : 0);
1298     break;
1299   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1300     a->nonew          = (flg ? -2 : 0);
1301     break;
1302   case MAT_NEW_DIAGONALS:
1303   case MAT_IGNORE_OFF_PROC_ENTRIES:
1304   case MAT_USE_HASH_TABLE:
1305     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1306     break;
1307   case MAT_SYMMETRIC:
1308   case MAT_STRUCTURALLY_SYMMETRIC:
1309   case MAT_HERMITIAN:
1310   case MAT_SYMMETRY_ETERNAL:
1311     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1312     break;
1313   default:
1314     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1326   MatScalar      *aa,*aa_i;
1327   PetscScalar    *v_i;
1328 
1329   PetscFunctionBegin;
1330   bs  = A->rmap->bs;
1331   ai  = a->i;
1332   aj  = a->j;
1333   aa  = a->a;
1334   bs2 = a->bs2;
1335 
1336   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1337 
1338   bn  = row/bs;   /* Block number */
1339   bp  = row % bs; /* Block Position */
1340   M   = ai[bn+1] - ai[bn];
1341   *nz = bs*M;
1342 
1343   if (v) {
1344     *v = 0;
1345     if (*nz) {
1346       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1347       for (i=0; i<M; i++) { /* for each block in the block row */
1348         v_i  = *v + i*bs;
1349         aa_i = aa + bs2*(ai[bn] + i);
1350         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1351       }
1352     }
1353   }
1354 
1355   if (idx) {
1356     *idx = 0;
1357     if (*nz) {
1358       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1359       for (i=0; i<M; i++) { /* for each block in the block row */
1360         idx_i = *idx + i*bs;
1361         itmp  = bs*aj[ai[bn] + i];
1362         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1363       }
1364     }
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1371 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1377   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1383 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1384 {
1385   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1386   Mat            C;
1387   PetscErrorCode ierr;
1388   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1389   PetscInt       *rows,*cols,bs2=a->bs2;
1390   MatScalar      *array;
1391 
1392   PetscFunctionBegin;
1393   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1394   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1395     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1396     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1397 
1398     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1399     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1400     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1401     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1402     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1403     ierr = PetscFree(col);CHKERRQ(ierr);
1404   } else {
1405     C = *B;
1406   }
1407 
1408   array = a->a;
1409   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1410   cols = rows + bs;
1411   for (i=0; i<mbs; i++) {
1412     cols[0] = i*bs;
1413     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1414     len = ai[i+1] - ai[i];
1415     for (j=0; j<len; j++) {
1416       rows[0] = (*aj++)*bs;
1417       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1418       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1419       array += bs2;
1420     }
1421   }
1422   ierr = PetscFree(rows);CHKERRQ(ierr);
1423 
1424   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426 
1427   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1428     *B = C;
1429   } else {
1430     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1437 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1442   int            fd;
1443   PetscScalar    *aa;
1444   FILE           *file;
1445 
1446   PetscFunctionBegin;
1447   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1448   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1449   col_lens[0] = MAT_FILE_COOKIE;
1450 
1451   col_lens[1] = A->rmap->N;
1452   col_lens[2] = A->cmap->n;
1453   col_lens[3] = a->nz*bs2;
1454 
1455   /* store lengths of each row and write (including header) to file */
1456   count = 0;
1457   for (i=0; i<a->mbs; i++) {
1458     for (j=0; j<bs; j++) {
1459       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1460     }
1461   }
1462   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1463   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1464 
1465   /* store column indices (zero start index) */
1466   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1467   count = 0;
1468   for (i=0; i<a->mbs; i++) {
1469     for (j=0; j<bs; j++) {
1470       for (k=a->i[i]; k<a->i[i+1]; k++) {
1471         for (l=0; l<bs; l++) {
1472           jj[count++] = bs*a->j[k] + l;
1473         }
1474       }
1475     }
1476   }
1477   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1478   ierr = PetscFree(jj);CHKERRQ(ierr);
1479 
1480   /* store nonzero values */
1481   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1482   count = 0;
1483   for (i=0; i<a->mbs; i++) {
1484     for (j=0; j<bs; j++) {
1485       for (k=a->i[i]; k<a->i[i+1]; k++) {
1486         for (l=0; l<bs; l++) {
1487           aa[count++] = a->a[bs2*k + l*bs + j];
1488         }
1489       }
1490     }
1491   }
1492   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1493   ierr = PetscFree(aa);CHKERRQ(ierr);
1494 
1495   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1496   if (file) {
1497     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505 {
1506   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1507   PetscErrorCode    ierr;
1508   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1509   PetscViewerFormat format;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1513   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1514     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1515   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1516     Mat aij;
1517     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1518     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(aij);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1521      PetscFunctionReturn(0);
1522   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1524     for (i=0; i<a->mbs; i++) {
1525       for (j=0; j<bs; j++) {
1526         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1527         for (k=a->i[i]; k<a->i[i+1]; k++) {
1528           for (l=0; l<bs; l++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1532                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1535                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             }
1539 #else
1540             if (a->a[bs2*k + l*bs + j] != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1542             }
1543 #endif
1544           }
1545         }
1546         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1547       }
1548     }
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1550   } else {
1551     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1552     for (i=0; i<a->mbs; i++) {
1553       for (j=0; j<bs; j++) {
1554         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1555         for (k=a->i[i]; k<a->i[i+1]; k++) {
1556           for (l=0; l<bs; l++) {
1557 #if defined(PETSC_USE_COMPLEX)
1558             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1560                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1563                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             }
1567 #else
1568             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1569 #endif
1570           }
1571         }
1572         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1573       }
1574     }
1575     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1576   }
1577   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1583 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1584 {
1585   Mat            A = (Mat) Aa;
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1587   PetscErrorCode ierr;
1588   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1589   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1590   MatScalar      *aa;
1591   PetscViewer    viewer;
1592 
1593   PetscFunctionBegin;
1594 
1595   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1596   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1597 
1598   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1599 
1600   /* loop over matrix elements drawing boxes */
1601   color = PETSC_DRAW_BLUE;
1602   for (i=0,row=0; i<mbs; i++,row+=bs) {
1603     for (j=a->i[i]; j<a->i[i+1]; j++) {
1604       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1605       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606       aa = a->a + j*bs2;
1607       for (k=0; k<bs; k++) {
1608         for (l=0; l<bs; l++) {
1609           if (PetscRealPart(*aa++) >=  0.) continue;
1610           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611         }
1612       }
1613     }
1614   }
1615   color = PETSC_DRAW_CYAN;
1616   for (i=0,row=0; i<mbs; i++,row+=bs) {
1617     for (j=a->i[i]; j<a->i[i+1]; j++) {
1618       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1619       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620       aa = a->a + j*bs2;
1621       for (k=0; k<bs; k++) {
1622         for (l=0; l<bs; l++) {
1623           if (PetscRealPart(*aa++) != 0.) continue;
1624           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625         }
1626       }
1627     }
1628   }
1629 
1630   color = PETSC_DRAW_RED;
1631   for (i=0,row=0; i<mbs; i++,row+=bs) {
1632     for (j=a->i[i]; j<a->i[i+1]; j++) {
1633       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635       aa = a->a + j*bs2;
1636       for (k=0; k<bs; k++) {
1637         for (l=0; l<bs; l++) {
1638           if (PetscRealPart(*aa++) <= 0.) continue;
1639           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640         }
1641       }
1642     }
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1649 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1650 {
1651   PetscErrorCode ierr;
1652   PetscReal      xl,yl,xr,yr,w,h;
1653   PetscDraw      draw;
1654   PetscTruth     isnull;
1655 
1656   PetscFunctionBegin;
1657 
1658   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1659   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1660 
1661   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1662   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1663   xr += w;    yr += h;  xl = -w;     yl = -h;
1664   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1665   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1666   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatView_SeqBAIJ"
1672 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1673 {
1674   PetscErrorCode ierr;
1675   PetscTruth     iascii,isbinary,isdraw;
1676 
1677   PetscFunctionBegin;
1678   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1681   if (iascii){
1682     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1683   } else if (isbinary) {
1684     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1685   } else if (isdraw) {
1686     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1687   } else {
1688     Mat B;
1689     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1690     ierr = MatView(B,viewer);CHKERRQ(ierr);
1691     ierr = MatDestroy(B);CHKERRQ(ierr);
1692   }
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1699 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1700 {
1701   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1702   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1703   PetscInt    *ai = a->i,*ailen = a->ilen;
1704   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1705   MatScalar   *ap,*aa = a->a;
1706 
1707   PetscFunctionBegin;
1708   for (k=0; k<m; k++) { /* loop over rows */
1709     row  = im[k]; brow = row/bs;
1710     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1711     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1712     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1713     nrow = ailen[brow];
1714     for (l=0; l<n; l++) { /* loop over columns */
1715       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1716       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1717       col  = in[l] ;
1718       bcol = col/bs;
1719       cidx = col%bs;
1720       ridx = row%bs;
1721       high = nrow;
1722       low  = 0; /* assume unsorted */
1723       while (high-low > 5) {
1724         t = (low+high)/2;
1725         if (rp[t] > bcol) high = t;
1726         else             low  = t;
1727       }
1728       for (i=low; i<high; i++) {
1729         if (rp[i] > bcol) break;
1730         if (rp[i] == bcol) {
1731           *v++ = ap[bs2*i+bs*cidx+ridx];
1732           goto finished;
1733         }
1734       }
1735       *v++ = 0.0;
1736       finished:;
1737     }
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #define CHUNKSIZE 10
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1745 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1746 {
1747   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1748   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1749   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1750   PetscErrorCode    ierr;
1751   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1752   PetscTruth        roworiented=a->roworiented;
1753   const PetscScalar *value = v;
1754   MatScalar         *ap,*aa = a->a,*bap;
1755 
1756   PetscFunctionBegin;
1757   if (roworiented) {
1758     stepval = (n-1)*bs;
1759   } else {
1760     stepval = (m-1)*bs;
1761   }
1762   for (k=0; k<m; k++) { /* loop over added rows */
1763     row  = im[k];
1764     if (row < 0) continue;
1765 #if defined(PETSC_USE_DEBUG)
1766     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1767 #endif
1768     rp   = aj + ai[row];
1769     ap   = aa + bs2*ai[row];
1770     rmax = imax[row];
1771     nrow = ailen[row];
1772     low  = 0;
1773     high = nrow;
1774     for (l=0; l<n; l++) { /* loop over added columns */
1775       if (in[l] < 0) continue;
1776 #if defined(PETSC_USE_DEBUG)
1777       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1778 #endif
1779       col = in[l];
1780       if (roworiented) {
1781         value = v + k*(stepval+bs)*bs + l*bs;
1782       } else {
1783         value = v + l*(stepval+bs)*bs + k*bs;
1784       }
1785       if (col <= lastcol) low = 0; else high = nrow;
1786       lastcol = col;
1787       while (high-low > 7) {
1788         t = (low+high)/2;
1789         if (rp[t] > col) high = t;
1790         else             low  = t;
1791       }
1792       for (i=low; i<high; i++) {
1793         if (rp[i] > col) break;
1794         if (rp[i] == col) {
1795           bap  = ap +  bs2*i;
1796           if (roworiented) {
1797             if (is == ADD_VALUES) {
1798               for (ii=0; ii<bs; ii++,value+=stepval) {
1799                 for (jj=ii; jj<bs2; jj+=bs) {
1800                   bap[jj] += *value++;
1801                 }
1802               }
1803             } else {
1804               for (ii=0; ii<bs; ii++,value+=stepval) {
1805                 for (jj=ii; jj<bs2; jj+=bs) {
1806                   bap[jj] = *value++;
1807                 }
1808               }
1809             }
1810           } else {
1811             if (is == ADD_VALUES) {
1812               for (ii=0; ii<bs; ii++,value+=stepval) {
1813                 for (jj=0; jj<bs; jj++) {
1814                   *bap++ += *value++;
1815                 }
1816               }
1817             } else {
1818               for (ii=0; ii<bs; ii++,value+=stepval) {
1819                 for (jj=0; jj<bs; jj++) {
1820                   *bap++  = *value++;
1821                 }
1822               }
1823             }
1824           }
1825           goto noinsert2;
1826         }
1827       }
1828       if (nonew == 1) goto noinsert2;
1829       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1830       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1831       N = nrow++ - 1; high++;
1832       /* shift up all the later entries in this row */
1833       for (ii=N; ii>=i; ii--) {
1834         rp[ii+1] = rp[ii];
1835         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1836       }
1837       if (N >= i) {
1838         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1839       }
1840       rp[i] = col;
1841       bap   = ap +  bs2*i;
1842       if (roworiented) {
1843         for (ii=0; ii<bs; ii++,value+=stepval) {
1844           for (jj=ii; jj<bs2; jj+=bs) {
1845             bap[jj] = *value++;
1846           }
1847         }
1848       } else {
1849         for (ii=0; ii<bs; ii++,value+=stepval) {
1850           for (jj=0; jj<bs; jj++) {
1851             *bap++  = *value++;
1852           }
1853         }
1854       }
1855       noinsert2:;
1856       low = i;
1857     }
1858     ailen[row] = nrow;
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1865 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1866 {
1867   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1868   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1869   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1870   PetscErrorCode ierr;
1871   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1872   MatScalar      *aa = a->a,*ap;
1873   PetscReal      ratio=0.6;
1874 
1875   PetscFunctionBegin;
1876   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1877 
1878   if (m) rmax = ailen[0];
1879   for (i=1; i<mbs; i++) {
1880     /* move each row back by the amount of empty slots (fshift) before it*/
1881     fshift += imax[i-1] - ailen[i-1];
1882     rmax   = PetscMax(rmax,ailen[i]);
1883     if (fshift) {
1884       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1885       N = ailen[i];
1886       for (j=0; j<N; j++) {
1887         ip[j-fshift] = ip[j];
1888         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1889       }
1890     }
1891     ai[i] = ai[i-1] + ailen[i-1];
1892   }
1893   if (mbs) {
1894     fshift += imax[mbs-1] - ailen[mbs-1];
1895     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1896   }
1897   /* reset ilen and imax for each row */
1898   for (i=0; i<mbs; i++) {
1899     ailen[i] = imax[i] = ai[i+1] - ai[i];
1900   }
1901   a->nz = ai[mbs];
1902 
1903   /* diagonals may have moved, so kill the diagonal pointers */
1904   a->idiagvalid = PETSC_FALSE;
1905   if (fshift && a->diag) {
1906     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1907     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1908     a->diag = 0;
1909   }
1910   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1911   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1912   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1913   a->reallocs          = 0;
1914   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1915 
1916   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1917   if (a->compressedrow.use){
1918     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1919   }
1920 
1921   A->same_nonzero = PETSC_TRUE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 /*
1926    This function returns an array of flags which indicate the locations of contiguous
1927    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1928    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1929    Assume: sizes should be long enough to hold all the values.
1930 */
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1933 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1934 {
1935   PetscInt   i,j,k,row;
1936   PetscTruth flg;
1937 
1938   PetscFunctionBegin;
1939   for (i=0,j=0; i<n; j++) {
1940     row = idx[i];
1941     if (row%bs!=0) { /* Not the begining of a block */
1942       sizes[j] = 1;
1943       i++;
1944     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1945       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1946       i++;
1947     } else { /* Begining of the block, so check if the complete block exists */
1948       flg = PETSC_TRUE;
1949       for (k=1; k<bs; k++) {
1950         if (row+k != idx[i+k]) { /* break in the block */
1951           flg = PETSC_FALSE;
1952           break;
1953         }
1954       }
1955       if (flg) { /* No break in the bs */
1956         sizes[j] = bs;
1957         i+= bs;
1958       } else {
1959         sizes[j] = 1;
1960         i++;
1961       }
1962     }
1963   }
1964   *bs_max = j;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1970 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1971 {
1972   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1973   PetscErrorCode ierr;
1974   PetscInt       i,j,k,count,*rows;
1975   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1976   PetscScalar    zero = 0.0;
1977   MatScalar      *aa;
1978 
1979   PetscFunctionBegin;
1980   /* Make a copy of the IS and  sort it */
1981   /* allocate memory for rows,sizes */
1982   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1983   sizes = rows + is_n;
1984 
1985   /* copy IS values to rows, and sort them */
1986   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1987   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1988   if (baij->keepzeroedrows) {
1989     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1990     bs_max = is_n;
1991     A->same_nonzero = PETSC_TRUE;
1992   } else {
1993     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1994     A->same_nonzero = PETSC_FALSE;
1995   }
1996 
1997   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1998     row   = rows[j];
1999     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2000     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2001     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2002     if (sizes[i] == bs && !baij->keepzeroedrows) {
2003       if (diag != 0.0) {
2004         if (baij->ilen[row/bs] > 0) {
2005           baij->ilen[row/bs]       = 1;
2006           baij->j[baij->i[row/bs]] = row/bs;
2007           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2008         }
2009         /* Now insert all the diagonal values for this bs */
2010         for (k=0; k<bs; k++) {
2011           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2012         }
2013       } else { /* (diag == 0.0) */
2014         baij->ilen[row/bs] = 0;
2015       } /* end (diag == 0.0) */
2016     } else { /* (sizes[i] != bs) */
2017 #if defined (PETSC_USE_DEBUG)
2018       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2019 #endif
2020       for (k=0; k<count; k++) {
2021         aa[0] =  zero;
2022         aa    += bs;
2023       }
2024       if (diag != 0.0) {
2025         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2026       }
2027     }
2028   }
2029 
2030   ierr = PetscFree(rows);CHKERRQ(ierr);
2031   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2037 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2038 {
2039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2040   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2041   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2042   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2043   PetscErrorCode ierr;
2044   PetscInt       ridx,cidx,bs2=a->bs2;
2045   PetscTruth     roworiented=a->roworiented;
2046   MatScalar      *ap,value,*aa=a->a,*bap;
2047 
2048   PetscFunctionBegin;
2049   for (k=0; k<m; k++) { /* loop over added rows */
2050     row  = im[k];
2051     brow = row/bs;
2052     if (row < 0) continue;
2053 #if defined(PETSC_USE_DEBUG)
2054     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2055 #endif
2056     rp   = aj + ai[brow];
2057     ap   = aa + bs2*ai[brow];
2058     rmax = imax[brow];
2059     nrow = ailen[brow];
2060     low  = 0;
2061     high = nrow;
2062     for (l=0; l<n; l++) { /* loop over added columns */
2063       if (in[l] < 0) continue;
2064 #if defined(PETSC_USE_DEBUG)
2065       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2066 #endif
2067       col = in[l]; bcol = col/bs;
2068       ridx = row % bs; cidx = col % bs;
2069       if (roworiented) {
2070         value = v[l + k*n];
2071       } else {
2072         value = v[k + l*m];
2073       }
2074       if (col <= lastcol) low = 0; else high = nrow;
2075       lastcol = col;
2076       while (high-low > 7) {
2077         t = (low+high)/2;
2078         if (rp[t] > bcol) high = t;
2079         else              low  = t;
2080       }
2081       for (i=low; i<high; i++) {
2082         if (rp[i] > bcol) break;
2083         if (rp[i] == bcol) {
2084           bap  = ap +  bs2*i + bs*cidx + ridx;
2085           if (is == ADD_VALUES) *bap += value;
2086           else                  *bap  = value;
2087           goto noinsert1;
2088         }
2089       }
2090       if (nonew == 1) goto noinsert1;
2091       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2092       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2093       N = nrow++ - 1; high++;
2094       /* shift up all the later entries in this row */
2095       for (ii=N; ii>=i; ii--) {
2096         rp[ii+1] = rp[ii];
2097         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2098       }
2099       if (N>=i) {
2100         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2101       }
2102       rp[i]                      = bcol;
2103       ap[bs2*i + bs*cidx + ridx] = value;
2104       a->nz++;
2105       noinsert1:;
2106       low = i;
2107     }
2108     ailen[brow] = nrow;
2109   }
2110   A->same_nonzero = PETSC_FALSE;
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2115 
2116 #undef __FUNCT__
2117 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2118 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2119 {
2120   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2121   Mat            outA;
2122   PetscErrorCode ierr;
2123   PetscTruth     row_identity,col_identity;
2124 
2125   PetscFunctionBegin;
2126   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2127   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2128   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2129   if (!row_identity || !col_identity) {
2130     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2131   }
2132 
2133   outA          = inA;
2134   inA->factor   = MAT_FACTOR_LU;
2135 
2136   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2137 
2138   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2139   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2140   a->row = row;
2141   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2142   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2143   a->col = col;
2144 
2145   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2146   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2148 
2149   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2150   if (!a->solve_work) {
2151     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2152     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2153   }
2154   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2155 
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 EXTERN_C_BEGIN
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2162 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2163 {
2164   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2165   PetscInt    i,nz,nbs;
2166 
2167   PetscFunctionBegin;
2168   nz  = baij->maxnz/baij->bs2;
2169   nbs = baij->nbs;
2170   for (i=0; i<nz; i++) {
2171     baij->j[i] = indices[i];
2172   }
2173   baij->nz = nz;
2174   for (i=0; i<nbs; i++) {
2175     baij->ilen[i] = baij->imax[i];
2176   }
2177 
2178   PetscFunctionReturn(0);
2179 }
2180 EXTERN_C_END
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2184 /*@
2185     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2186        in the matrix.
2187 
2188   Input Parameters:
2189 +  mat - the SeqBAIJ matrix
2190 -  indices - the column indices
2191 
2192   Level: advanced
2193 
2194   Notes:
2195     This can be called if you have precomputed the nonzero structure of the
2196   matrix and want to provide it to the matrix object to improve the performance
2197   of the MatSetValues() operation.
2198 
2199     You MUST have set the correct numbers of nonzeros per row in the call to
2200   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2201 
2202     MUST be called before any calls to MatSetValues();
2203 
2204 @*/
2205 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2206 {
2207   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2208 
2209   PetscFunctionBegin;
2210   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2211   PetscValidPointer(indices,2);
2212   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2213   if (f) {
2214     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2215   } else {
2216     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2223 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2224 {
2225   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2226   PetscErrorCode ierr;
2227   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2228   PetscReal      atmp;
2229   PetscScalar    *x,zero = 0.0;
2230   MatScalar      *aa;
2231   PetscInt       ncols,brow,krow,kcol;
2232 
2233   PetscFunctionBegin;
2234   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2235   bs   = A->rmap->bs;
2236   aa   = a->a;
2237   ai   = a->i;
2238   aj   = a->j;
2239   mbs  = a->mbs;
2240 
2241   ierr = VecSet(v,zero);CHKERRQ(ierr);
2242   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2243   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2244   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2245   for (i=0; i<mbs; i++) {
2246     ncols = ai[1] - ai[0]; ai++;
2247     brow  = bs*i;
2248     for (j=0; j<ncols; j++){
2249       for (kcol=0; kcol<bs; kcol++){
2250         for (krow=0; krow<bs; krow++){
2251           atmp = PetscAbsScalar(*aa);aa++;
2252           row = brow + krow;    /* row index */
2253           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2254           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2255         }
2256       }
2257       aj++;
2258     }
2259   }
2260   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2261   PetscFunctionReturn(0);
2262 }
2263 
2264 #undef __FUNCT__
2265 #define __FUNCT__ "MatCopy_SeqBAIJ"
2266 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2267 {
2268   PetscErrorCode ierr;
2269 
2270   PetscFunctionBegin;
2271   /* If the two matrices have the same copy implementation, use fast copy. */
2272   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2273     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2274     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2275 
2276     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2277       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2278     }
2279     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2280   } else {
2281     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2282   }
2283   PetscFunctionReturn(0);
2284 }
2285 
2286 #undef __FUNCT__
2287 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2288 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2289 {
2290   PetscErrorCode ierr;
2291 
2292   PetscFunctionBegin;
2293   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2294   PetscFunctionReturn(0);
2295 }
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2299 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2300 {
2301   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2302   PetscFunctionBegin;
2303   *array = a->a;
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2309 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2310 {
2311   PetscFunctionBegin;
2312   PetscFunctionReturn(0);
2313 }
2314 
2315 #include "petscblaslapack.h"
2316 #undef __FUNCT__
2317 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2318 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2319 {
2320   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2321   PetscErrorCode ierr;
2322   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2323   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2324 
2325   PetscFunctionBegin;
2326   if (str == SAME_NONZERO_PATTERN) {
2327     PetscScalar alpha = a;
2328     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2329   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2330     if (y->xtoy && y->XtoY != X) {
2331       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2332       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2333     }
2334     if (!y->xtoy) { /* get xtoy */
2335       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2336       y->XtoY = X;
2337     }
2338     bs2 = bs*bs;
2339     for (i=0; i<x->nz; i++) {
2340       j = 0;
2341       while (j < bs2){
2342         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2343         j++;
2344       }
2345     }
2346     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2347   } else {
2348     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2349   }
2350   PetscFunctionReturn(0);
2351 }
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2355 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2356 {
2357   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2358   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2359   MatScalar      *aa = a->a;
2360 
2361   PetscFunctionBegin;
2362   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2363   PetscFunctionReturn(0);
2364 }
2365 
2366 #undef __FUNCT__
2367 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2368 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2369 {
2370   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2371   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2372   MatScalar      *aa = a->a;
2373 
2374   PetscFunctionBegin;
2375   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 
2380 /* -------------------------------------------------------------------*/
2381 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2382        MatGetRow_SeqBAIJ,
2383        MatRestoreRow_SeqBAIJ,
2384        MatMult_SeqBAIJ_N,
2385 /* 4*/ MatMultAdd_SeqBAIJ_N,
2386        MatMultTranspose_SeqBAIJ,
2387        MatMultTransposeAdd_SeqBAIJ,
2388        0,
2389        0,
2390        0,
2391 /*10*/ 0,
2392        MatLUFactor_SeqBAIJ,
2393        0,
2394        0,
2395        MatTranspose_SeqBAIJ,
2396 /*15*/ MatGetInfo_SeqBAIJ,
2397        MatEqual_SeqBAIJ,
2398        MatGetDiagonal_SeqBAIJ,
2399        MatDiagonalScale_SeqBAIJ,
2400        MatNorm_SeqBAIJ,
2401 /*20*/ 0,
2402        MatAssemblyEnd_SeqBAIJ,
2403        0,
2404        MatSetOption_SeqBAIJ,
2405        MatZeroEntries_SeqBAIJ,
2406 /*25*/ MatZeroRows_SeqBAIJ,
2407        0,
2408        0,
2409        0,
2410        0,
2411 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2412        0,
2413        0,
2414        MatGetArray_SeqBAIJ,
2415        MatRestoreArray_SeqBAIJ,
2416 /*35*/ MatDuplicate_SeqBAIJ,
2417        0,
2418        0,
2419        MatILUFactor_SeqBAIJ,
2420        0,
2421 /*40*/ MatAXPY_SeqBAIJ,
2422        MatGetSubMatrices_SeqBAIJ,
2423        MatIncreaseOverlap_SeqBAIJ,
2424        MatGetValues_SeqBAIJ,
2425        MatCopy_SeqBAIJ,
2426 /*45*/ 0,
2427        MatScale_SeqBAIJ,
2428        0,
2429        0,
2430        0,
2431 /*50*/ 0,
2432        MatGetRowIJ_SeqBAIJ,
2433        MatRestoreRowIJ_SeqBAIJ,
2434        0,
2435        0,
2436 /*55*/ 0,
2437        0,
2438        0,
2439        0,
2440        MatSetValuesBlocked_SeqBAIJ,
2441 /*60*/ MatGetSubMatrix_SeqBAIJ,
2442        MatDestroy_SeqBAIJ,
2443        MatView_SeqBAIJ,
2444        0,
2445        0,
2446 /*65*/ 0,
2447        0,
2448        0,
2449        0,
2450        0,
2451 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2452        MatConvert_Basic,
2453        0,
2454        0,
2455        0,
2456 /*75*/ 0,
2457        0,
2458        0,
2459        0,
2460        0,
2461 /*80*/ 0,
2462        0,
2463        0,
2464        0,
2465        MatLoad_SeqBAIJ,
2466 /*85*/ 0,
2467        0,
2468        0,
2469        0,
2470        0,
2471 /*90*/ 0,
2472        0,
2473        0,
2474        0,
2475        0,
2476 /*95*/ 0,
2477        0,
2478        0,
2479        0,
2480        0,
2481 /*100*/0,
2482        0,
2483        0,
2484        0,
2485        0,
2486 /*105*/0,
2487        MatRealPart_SeqBAIJ,
2488        MatImaginaryPart_SeqBAIJ,
2489        0,
2490        0,
2491 /*110*/0,
2492        0,
2493        0,
2494        0,
2495        MatMissingDiagonal_SeqBAIJ
2496 /*115*/
2497 };
2498 
2499 EXTERN_C_BEGIN
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2502 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2503 {
2504   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2505   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2506   PetscErrorCode ierr;
2507 
2508   PetscFunctionBegin;
2509   if (aij->nonew != 1) {
2510     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2511   }
2512 
2513   /* allocate space for values if not already there */
2514   if (!aij->saved_values) {
2515     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2516   }
2517 
2518   /* copy values over */
2519   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2520   PetscFunctionReturn(0);
2521 }
2522 EXTERN_C_END
2523 
2524 EXTERN_C_BEGIN
2525 #undef __FUNCT__
2526 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2527 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2528 {
2529   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2530   PetscErrorCode ierr;
2531   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2532 
2533   PetscFunctionBegin;
2534   if (aij->nonew != 1) {
2535     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2536   }
2537   if (!aij->saved_values) {
2538     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2539   }
2540 
2541   /* copy values over */
2542   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 EXTERN_C_END
2546 
2547 EXTERN_C_BEGIN
2548 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2549 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2550 EXTERN_C_END
2551 
2552 EXTERN_C_BEGIN
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2555 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2556 {
2557   Mat_SeqBAIJ    *b;
2558   PetscErrorCode ierr;
2559   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2560   PetscTruth     flg,skipallocation = PETSC_FALSE;
2561 
2562   PetscFunctionBegin;
2563 
2564   if (nz == MAT_SKIP_ALLOCATION) {
2565     skipallocation = PETSC_TRUE;
2566     nz             = 0;
2567   }
2568 
2569   if (bs < 0) {
2570     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2571       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2572     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2573     bs   = PetscAbs(bs);
2574   }
2575   if (nnz && newbs != bs) {
2576     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2577   }
2578   bs   = newbs;
2579 
2580   B->rmap->bs = B->cmap->bs = bs;
2581   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2582   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2583 
2584   B->preallocated = PETSC_TRUE;
2585 
2586   mbs  = B->rmap->n/bs;
2587   nbs  = B->cmap->n/bs;
2588   bs2  = bs*bs;
2589 
2590   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2591     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2592   }
2593 
2594   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2595   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2596   if (nnz) {
2597     for (i=0; i<mbs; i++) {
2598       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2599       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2600     }
2601   }
2602 
2603   b       = (Mat_SeqBAIJ*)B->data;
2604   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2605     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2606   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2607 
2608   if (!flg) {
2609     switch (bs) {
2610     case 1:
2611       B->ops->mult            = MatMult_SeqBAIJ_1;
2612       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2613       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2614       break;
2615     case 2:
2616       B->ops->mult            = MatMult_SeqBAIJ_2;
2617       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2618       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2619       break;
2620     case 3:
2621       B->ops->mult            = MatMult_SeqBAIJ_3;
2622       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2623       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2624       break;
2625     case 4:
2626       B->ops->mult            = MatMult_SeqBAIJ_4;
2627       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2628       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2629       break;
2630     case 5:
2631       B->ops->mult            = MatMult_SeqBAIJ_5;
2632       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2633       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2634       break;
2635     case 6:
2636       B->ops->mult            = MatMult_SeqBAIJ_6;
2637       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2638       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2639       break;
2640     case 7:
2641       B->ops->mult            = MatMult_SeqBAIJ_7;
2642       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2643       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2644       break;
2645     default:
2646       B->ops->mult            = MatMult_SeqBAIJ_N;
2647       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2648       break;
2649     }
2650   }
2651   B->rmap->bs      = bs;
2652   b->mbs     = mbs;
2653   b->nbs     = nbs;
2654   if (!skipallocation) {
2655     if (!b->imax) {
2656       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2657     }
2658     /* b->ilen will count nonzeros in each block row so far. */
2659     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2660     if (!nnz) {
2661       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2662       else if (nz <= 0)        nz = 1;
2663       for (i=0; i<mbs; i++) b->imax[i] = nz;
2664       nz = nz*mbs;
2665     } else {
2666       nz = 0;
2667       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2668     }
2669 
2670     /* allocate the matrix space */
2671     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2672     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2673     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2674     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2675     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2676     b->singlemalloc = PETSC_TRUE;
2677     b->i[0] = 0;
2678     for (i=1; i<mbs+1; i++) {
2679       b->i[i] = b->i[i-1] + b->imax[i-1];
2680     }
2681     b->free_a     = PETSC_TRUE;
2682     b->free_ij    = PETSC_TRUE;
2683   } else {
2684     b->free_a     = PETSC_FALSE;
2685     b->free_ij    = PETSC_FALSE;
2686   }
2687 
2688   B->rmap->bs          = bs;
2689   b->bs2              = bs2;
2690   b->mbs              = mbs;
2691   b->nz               = 0;
2692   b->maxnz            = nz*bs2;
2693   B->info.nz_unneeded = (PetscReal)b->maxnz;
2694   PetscFunctionReturn(0);
2695 }
2696 EXTERN_C_END
2697 
2698 EXTERN_C_BEGIN
2699 #undef __FUNCT__
2700 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2701 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2702 {
2703   PetscInt       i,m,nz,nz_max=0,*nnz;
2704   PetscScalar    *values=0;
2705   PetscErrorCode ierr;
2706 
2707   PetscFunctionBegin;
2708 
2709   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2710   B->rmap->bs = bs;
2711   B->cmap->bs = bs;
2712   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2713   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2714   m = B->rmap->n/bs;
2715 
2716   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2717   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2718   for(i=0; i<m; i++) {
2719     nz = ii[i+1]- ii[i];
2720     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2721     nz_max = PetscMax(nz_max, nz);
2722     nnz[i] = nz;
2723   }
2724   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2725   ierr = PetscFree(nnz);CHKERRQ(ierr);
2726 
2727   values = (PetscScalar*)V;
2728   if (!values) {
2729     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2730     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2731   }
2732   for (i=0; i<m; i++) {
2733     PetscInt          ncols  = ii[i+1] - ii[i];
2734     const PetscInt    *icols = jj + ii[i];
2735     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2736     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2737   }
2738   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2739   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2740   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2741 
2742   PetscFunctionReturn(0);
2743 }
2744 EXTERN_C_END
2745 
2746 
2747 EXTERN_C_BEGIN
2748 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2749 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2750 EXTERN_C_END
2751 
2752 /*MC
2753    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2754    block sparse compressed row format.
2755 
2756    Options Database Keys:
2757 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2758 
2759   Level: beginner
2760 
2761 .seealso: MatCreateSeqBAIJ()
2762 M*/
2763 
2764 
2765 EXTERN_C_BEGIN
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatCreate_SeqBAIJ"
2768 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2769 {
2770   PetscErrorCode ierr;
2771   PetscMPIInt    size;
2772   Mat_SeqBAIJ    *b;
2773 
2774   PetscFunctionBegin;
2775   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2776   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2777 
2778   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2779   B->data = (void*)b;
2780   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2781   B->mapping          = 0;
2782   b->row              = 0;
2783   b->col              = 0;
2784   b->icol             = 0;
2785   b->reallocs         = 0;
2786   b->saved_values     = 0;
2787 
2788   b->roworiented      = PETSC_TRUE;
2789   b->nonew            = 0;
2790   b->diag             = 0;
2791   b->solve_work       = 0;
2792   b->mult_work        = 0;
2793   B->spptr            = 0;
2794   B->info.nz_unneeded = (PetscReal)b->maxnz;
2795   b->keepzeroedrows   = PETSC_FALSE;
2796   b->xtoy              = 0;
2797   b->XtoY              = 0;
2798   b->compressedrow.use     = PETSC_FALSE;
2799   b->compressedrow.nrows   = 0;
2800   b->compressedrow.i       = PETSC_NULL;
2801   b->compressedrow.rindex  = PETSC_NULL;
2802   b->compressedrow.checked = PETSC_FALSE;
2803   B->same_nonzero          = PETSC_FALSE;
2804 
2805   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2806                                      "MatGetFactorAvailable_seqbaij_petsc",
2807                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2808   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2809                                      "MatGetFactor_seqbaij_petsc",
2810                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2812                                      "MatInvertBlockDiagonal_SeqBAIJ",
2813                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2815                                      "MatStoreValues_SeqBAIJ",
2816                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2818                                      "MatRetrieveValues_SeqBAIJ",
2819                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2821                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2822                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2824                                      "MatConvert_SeqBAIJ_SeqAIJ",
2825                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2827                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2828                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2830                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2831                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2833                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2834                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2835   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2836   PetscFunctionReturn(0);
2837 }
2838 EXTERN_C_END
2839 
2840 #undef __FUNCT__
2841 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2842 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2843 {
2844   Mat            C = *B;
2845   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2846   PetscErrorCode ierr;
2847   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2848 
2849   PetscFunctionBegin;
2850   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2851 
2852   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2853   for (i=0; i<mbs; i++) {
2854     c->imax[i] = a->imax[i];
2855     c->ilen[i] = a->ilen[i];
2856   }
2857 
2858   /* allocate the matrix space */
2859   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2860   c->singlemalloc = PETSC_TRUE;
2861   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2862   if (mbs > 0) {
2863     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2864     if (cpvalues == MAT_COPY_VALUES) {
2865       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2866     } else {
2867       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2868     }
2869   }
2870   c->roworiented = a->roworiented;
2871   c->nonew       = a->nonew;
2872   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2873   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2874   c->bs2         = a->bs2;
2875   c->mbs         = a->mbs;
2876   c->nbs         = a->nbs;
2877 
2878   if (a->diag) {
2879     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2880     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2881     for (i=0; i<mbs; i++) {
2882       c->diag[i] = a->diag[i];
2883     }
2884   } else c->diag        = 0;
2885   c->nz                 = a->nz;
2886   c->maxnz              = a->maxnz;
2887   c->solve_work         = 0;
2888   c->mult_work          = 0;
2889   c->free_a             = PETSC_TRUE;
2890   c->free_ij            = PETSC_TRUE;
2891   C->preallocated       = PETSC_TRUE;
2892   C->assembled          = PETSC_TRUE;
2893 
2894   c->compressedrow.use     = a->compressedrow.use;
2895   c->compressedrow.nrows   = a->compressedrow.nrows;
2896   c->compressedrow.checked = a->compressedrow.checked;
2897   if ( a->compressedrow.checked && a->compressedrow.use){
2898     i = a->compressedrow.nrows;
2899     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2900     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2901     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2902     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2903   } else {
2904     c->compressedrow.use    = PETSC_FALSE;
2905     c->compressedrow.i      = PETSC_NULL;
2906     c->compressedrow.rindex = PETSC_NULL;
2907   }
2908   C->same_nonzero = A->same_nonzero;
2909   *B = C;
2910   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2911   PetscFunctionReturn(0);
2912 }
2913 
2914 #undef __FUNCT__
2915 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2916 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2917 {
2918     PetscErrorCode ierr;
2919 
2920   PetscFunctionBegin;
2921   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2922   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2923   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2924   ierr = MatDuplicateNoCreate_SeqBAIJ(A,cpvalues,B);
2925   PetscFunctionReturn(0);
2926 }
2927 
2928 #undef __FUNCT__
2929 #define __FUNCT__ "MatLoad_SeqBAIJ"
2930 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2931 {
2932   Mat_SeqBAIJ    *a;
2933   Mat            B;
2934   PetscErrorCode ierr;
2935   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2936   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2937   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2938   PetscInt       *masked,nmask,tmp,bs2,ishift;
2939   PetscMPIInt    size;
2940   int            fd;
2941   PetscScalar    *aa;
2942   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2943 
2944   PetscFunctionBegin;
2945   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2946     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2947   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2948   bs2  = bs*bs;
2949 
2950   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2951   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2952   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2953   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2954   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2955   M = header[1]; N = header[2]; nz = header[3];
2956 
2957   if (header[3] < 0) {
2958     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2959   }
2960 
2961   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2962 
2963   /*
2964      This code adds extra rows to make sure the number of rows is
2965     divisible by the blocksize
2966   */
2967   mbs        = M/bs;
2968   extra_rows = bs - M + bs*(mbs);
2969   if (extra_rows == bs) extra_rows = 0;
2970   else                  mbs++;
2971   if (extra_rows) {
2972     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2973   }
2974 
2975   /* read in row lengths */
2976   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2977   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2978   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2979 
2980   /* read in column indices */
2981   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2982   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2983   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2984 
2985   /* loop over row lengths determining block row lengths */
2986   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2987   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2988   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2989   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2990   masked   = mask + mbs;
2991   rowcount = 0; nzcount = 0;
2992   for (i=0; i<mbs; i++) {
2993     nmask = 0;
2994     for (j=0; j<bs; j++) {
2995       kmax = rowlengths[rowcount];
2996       for (k=0; k<kmax; k++) {
2997         tmp = jj[nzcount++]/bs;
2998         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2999       }
3000       rowcount++;
3001     }
3002     browlengths[i] += nmask;
3003     /* zero out the mask elements we set */
3004     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3005   }
3006 
3007   /* create our matrix */
3008   ierr = MatCreate(comm,&B);
3009   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3010   ierr = MatSetType(B,type);CHKERRQ(ierr);
3011   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3012   a = (Mat_SeqBAIJ*)B->data;
3013 
3014   /* set matrix "i" values */
3015   a->i[0] = 0;
3016   for (i=1; i<= mbs; i++) {
3017     a->i[i]      = a->i[i-1] + browlengths[i-1];
3018     a->ilen[i-1] = browlengths[i-1];
3019   }
3020   a->nz         = 0;
3021   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3022 
3023   /* read in nonzero values */
3024   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3025   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3026   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3027 
3028   /* set "a" and "j" values into matrix */
3029   nzcount = 0; jcount = 0;
3030   for (i=0; i<mbs; i++) {
3031     nzcountb = nzcount;
3032     nmask    = 0;
3033     for (j=0; j<bs; j++) {
3034       kmax = rowlengths[i*bs+j];
3035       for (k=0; k<kmax; k++) {
3036         tmp = jj[nzcount++]/bs;
3037 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3038       }
3039     }
3040     /* sort the masked values */
3041     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3042 
3043     /* set "j" values into matrix */
3044     maskcount = 1;
3045     for (j=0; j<nmask; j++) {
3046       a->j[jcount++]  = masked[j];
3047       mask[masked[j]] = maskcount++;
3048     }
3049     /* set "a" values into matrix */
3050     ishift = bs2*a->i[i];
3051     for (j=0; j<bs; j++) {
3052       kmax = rowlengths[i*bs+j];
3053       for (k=0; k<kmax; k++) {
3054         tmp       = jj[nzcountb]/bs ;
3055         block     = mask[tmp] - 1;
3056         point     = jj[nzcountb] - bs*tmp;
3057         idx       = ishift + bs2*block + j + bs*point;
3058         a->a[idx] = (MatScalar)aa[nzcountb++];
3059       }
3060     }
3061     /* zero out the mask elements we set */
3062     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3063   }
3064   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3065 
3066   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3067   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3068   ierr = PetscFree(aa);CHKERRQ(ierr);
3069   ierr = PetscFree(jj);CHKERRQ(ierr);
3070   ierr = PetscFree(mask);CHKERRQ(ierr);
3071 
3072   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3073   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3074   ierr = MatView_Private(B);CHKERRQ(ierr);
3075 
3076   *A = B;
3077   PetscFunctionReturn(0);
3078 }
3079 
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatCreateSeqBAIJ"
3082 /*@C
3083    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3084    compressed row) format.  For good matrix assembly performance the
3085    user should preallocate the matrix storage by setting the parameter nz
3086    (or the array nnz).  By setting these parameters accurately, performance
3087    during matrix assembly can be increased by more than a factor of 50.
3088 
3089    Collective on MPI_Comm
3090 
3091    Input Parameters:
3092 +  comm - MPI communicator, set to PETSC_COMM_SELF
3093 .  bs - size of block
3094 .  m - number of rows
3095 .  n - number of columns
3096 .  nz - number of nonzero blocks  per block row (same for all rows)
3097 -  nnz - array containing the number of nonzero blocks in the various block rows
3098          (possibly different for each block row) or PETSC_NULL
3099 
3100    Output Parameter:
3101 .  A - the matrix
3102 
3103    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3104    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3105    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3106    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3107 
3108    Options Database Keys:
3109 .   -mat_no_unroll - uses code that does not unroll the loops in the
3110                      block calculations (much slower)
3111 .    -mat_block_size - size of the blocks to use
3112 
3113    Level: intermediate
3114 
3115    Notes:
3116    The number of rows and columns must be divisible by blocksize.
3117 
3118    If the nnz parameter is given then the nz parameter is ignored
3119 
3120    A nonzero block is any block that as 1 or more nonzeros in it
3121 
3122    The block AIJ format is fully compatible with standard Fortran 77
3123    storage.  That is, the stored row and column indices can begin at
3124    either one (as in Fortran) or zero.  See the users' manual for details.
3125 
3126    Specify the preallocated storage with either nz or nnz (not both).
3127    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3128    allocation.  For additional details, see the users manual chapter on
3129    matrices.
3130 
3131 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3132 @*/
3133 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3134 {
3135   PetscErrorCode ierr;
3136 
3137   PetscFunctionBegin;
3138   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3139   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3140   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3141   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3142   PetscFunctionReturn(0);
3143 }
3144 
3145 #undef __FUNCT__
3146 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3147 /*@C
3148    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3149    per row in the matrix. For good matrix assembly performance the
3150    user should preallocate the matrix storage by setting the parameter nz
3151    (or the array nnz).  By setting these parameters accurately, performance
3152    during matrix assembly can be increased by more than a factor of 50.
3153 
3154    Collective on MPI_Comm
3155 
3156    Input Parameters:
3157 +  A - the matrix
3158 .  bs - size of block
3159 .  nz - number of block nonzeros per block row (same for all rows)
3160 -  nnz - array containing the number of block nonzeros in the various block rows
3161          (possibly different for each block row) or PETSC_NULL
3162 
3163    Options Database Keys:
3164 .   -mat_no_unroll - uses code that does not unroll the loops in the
3165                      block calculations (much slower)
3166 .    -mat_block_size - size of the blocks to use
3167 
3168    Level: intermediate
3169 
3170    Notes:
3171    If the nnz parameter is given then the nz parameter is ignored
3172 
3173    You can call MatGetInfo() to get information on how effective the preallocation was;
3174    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3175    You can also run with the option -info and look for messages with the string
3176    malloc in them to see if additional memory allocation was needed.
3177 
3178    The block AIJ format is fully compatible with standard Fortran 77
3179    storage.  That is, the stored row and column indices can begin at
3180    either one (as in Fortran) or zero.  See the users' manual for details.
3181 
3182    Specify the preallocated storage with either nz or nnz (not both).
3183    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3184    allocation.  For additional details, see the users manual chapter on
3185    matrices.
3186 
3187 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3188 @*/
3189 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3190 {
3191   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3192 
3193   PetscFunctionBegin;
3194   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3195   if (f) {
3196     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3197   }
3198   PetscFunctionReturn(0);
3199 }
3200 
3201 #undef __FUNCT__
3202 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3203 /*@C
3204    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3205    (the default sequential PETSc format).
3206 
3207    Collective on MPI_Comm
3208 
3209    Input Parameters:
3210 +  A - the matrix
3211 .  i - the indices into j for the start of each local row (starts with zero)
3212 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3213 -  v - optional values in the matrix
3214 
3215    Level: developer
3216 
3217 .keywords: matrix, aij, compressed row, sparse
3218 
3219 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3220 @*/
3221 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3222 {
3223   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3224 
3225   PetscFunctionBegin;
3226   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3227   if (f) {
3228     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3229   }
3230   PetscFunctionReturn(0);
3231 }
3232 
3233 
3234 #undef __FUNCT__
3235 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3236 /*@
3237      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3238               (upper triangular entries in CSR format) provided by the user.
3239 
3240      Collective on MPI_Comm
3241 
3242    Input Parameters:
3243 +  comm - must be an MPI communicator of size 1
3244 .  bs - size of block
3245 .  m - number of rows
3246 .  n - number of columns
3247 .  i - row indices
3248 .  j - column indices
3249 -  a - matrix values
3250 
3251    Output Parameter:
3252 .  mat - the matrix
3253 
3254    Level: intermediate
3255 
3256    Notes:
3257        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3258     once the matrix is destroyed
3259 
3260        You cannot set new nonzero locations into this matrix, that will generate an error.
3261 
3262        The i and j indices are 0 based
3263 
3264 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3265 
3266 @*/
3267 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3268 {
3269   PetscErrorCode ierr;
3270   PetscInt       ii;
3271   Mat_SeqBAIJ    *baij;
3272 
3273   PetscFunctionBegin;
3274   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3275   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3276 
3277   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3278   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3279   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3280   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3281   baij = (Mat_SeqBAIJ*)(*mat)->data;
3282   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3283 
3284   baij->i = i;
3285   baij->j = j;
3286   baij->a = a;
3287   baij->singlemalloc = PETSC_FALSE;
3288   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3289   baij->free_a       = PETSC_FALSE;
3290   baij->free_ij       = PETSC_FALSE;
3291 
3292   for (ii=0; ii<m; ii++) {
3293     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3294 #if defined(PETSC_USE_DEBUG)
3295     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3296 #endif
3297   }
3298 #if defined(PETSC_USE_DEBUG)
3299   for (ii=0; ii<baij->i[m]; ii++) {
3300     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3301     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3302   }
3303 #endif
3304 
3305   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3306   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3307   PetscFunctionReturn(0);
3308 }
3309