xref: /petsc/src/mat/impls/baij/seq/baij.c (revision ce0a2cd1da0658c2b28aad1be2e2c8e41567bece)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap.bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   ierr = PetscFree(a);CHKERRQ(ierr);
1264 
1265   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1266   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1278 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282 
1283   PetscFunctionBegin;
1284   switch (op) {
1285   case MAT_ROW_ORIENTED:
1286     a->roworiented    = flg;
1287     break;
1288   case MAT_KEEP_ZEROED_ROWS:
1289     a->keepzeroedrows = flg;
1290     break;
1291   case MAT_NEW_NONZERO_LOCATIONS:
1292     a->nonew          = (flg ? 0 : 1);
1293     break;
1294   case MAT_NEW_NONZERO_LOCATION_ERR:
1295     a->nonew          = (flg ? -1 : 0);
1296     break;
1297   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1298     a->nonew          = (flg ? -2 : 0);
1299     break;
1300   case MAT_NEW_DIAGONALS:
1301   case MAT_IGNORE_OFF_PROC_ENTRIES:
1302   case MAT_USE_HASH_TABLE:
1303     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1304     break;
1305   case MAT_SYMMETRIC:
1306   case MAT_STRUCTURALLY_SYMMETRIC:
1307   case MAT_HERMITIAN:
1308   case MAT_SYMMETRY_ETERNAL:
1309     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1310     break;
1311   default:
1312     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1319 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1320 {
1321   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1324   MatScalar      *aa,*aa_i;
1325   PetscScalar    *v_i;
1326 
1327   PetscFunctionBegin;
1328   bs  = A->rmap.bs;
1329   ai  = a->i;
1330   aj  = a->j;
1331   aa  = a->a;
1332   bs2 = a->bs2;
1333 
1334   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1335 
1336   bn  = row/bs;   /* Block number */
1337   bp  = row % bs; /* Block Position */
1338   M   = ai[bn+1] - ai[bn];
1339   *nz = bs*M;
1340 
1341   if (v) {
1342     *v = 0;
1343     if (*nz) {
1344       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1345       for (i=0; i<M; i++) { /* for each block in the block row */
1346         v_i  = *v + i*bs;
1347         aa_i = aa + bs2*(ai[bn] + i);
1348         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1349       }
1350     }
1351   }
1352 
1353   if (idx) {
1354     *idx = 0;
1355     if (*nz) {
1356       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1357       for (i=0; i<M; i++) { /* for each block in the block row */
1358         idx_i = *idx + i*bs;
1359         itmp  = bs*aj[ai[bn] + i];
1360         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1361       }
1362     }
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1369 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1370 {
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1375   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1376   PetscFunctionReturn(0);
1377 }
1378 
1379 #undef __FUNCT__
1380 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1381 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1382 {
1383   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1384   Mat            C;
1385   PetscErrorCode ierr;
1386   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1387   PetscInt       *rows,*cols,bs2=a->bs2;
1388   MatScalar      *array;
1389 
1390   PetscFunctionBegin;
1391   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1392   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1393   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1394 
1395   array = a->a;
1396   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1397   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1398   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1399   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1400   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1401   ierr = PetscFree(col);CHKERRQ(ierr);
1402   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1403   cols = rows + bs;
1404   for (i=0; i<mbs; i++) {
1405     cols[0] = i*bs;
1406     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1407     len = ai[i+1] - ai[i];
1408     for (j=0; j<len; j++) {
1409       rows[0] = (*aj++)*bs;
1410       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1411       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1412       array += bs2;
1413     }
1414   }
1415   ierr = PetscFree(rows);CHKERRQ(ierr);
1416 
1417   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1418   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1419 
1420   if (B) {
1421     *B = C;
1422   } else {
1423     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1424   }
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1430 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1431 {
1432   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1433   PetscErrorCode ierr;
1434   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1435   int            fd;
1436   PetscScalar    *aa;
1437   FILE           *file;
1438 
1439   PetscFunctionBegin;
1440   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1441   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1442   col_lens[0] = MAT_FILE_COOKIE;
1443 
1444   col_lens[1] = A->rmap.N;
1445   col_lens[2] = A->cmap.n;
1446   col_lens[3] = a->nz*bs2;
1447 
1448   /* store lengths of each row and write (including header) to file */
1449   count = 0;
1450   for (i=0; i<a->mbs; i++) {
1451     for (j=0; j<bs; j++) {
1452       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1453     }
1454   }
1455   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1456   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1457 
1458   /* store column indices (zero start index) */
1459   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1460   count = 0;
1461   for (i=0; i<a->mbs; i++) {
1462     for (j=0; j<bs; j++) {
1463       for (k=a->i[i]; k<a->i[i+1]; k++) {
1464         for (l=0; l<bs; l++) {
1465           jj[count++] = bs*a->j[k] + l;
1466         }
1467       }
1468     }
1469   }
1470   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1471   ierr = PetscFree(jj);CHKERRQ(ierr);
1472 
1473   /* store nonzero values */
1474   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1475   count = 0;
1476   for (i=0; i<a->mbs; i++) {
1477     for (j=0; j<bs; j++) {
1478       for (k=a->i[i]; k<a->i[i+1]; k++) {
1479         for (l=0; l<bs; l++) {
1480           aa[count++] = a->a[bs2*k + l*bs + j];
1481         }
1482       }
1483     }
1484   }
1485   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1486   ierr = PetscFree(aa);CHKERRQ(ierr);
1487 
1488   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1489   if (file) {
1490     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1491   }
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 #undef __FUNCT__
1496 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1497 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1498 {
1499   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1500   PetscErrorCode    ierr;
1501   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1502   PetscViewerFormat format;
1503 
1504   PetscFunctionBegin;
1505   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1506   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1507     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1508   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1509     Mat aij;
1510     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1511     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1512     ierr = MatDestroy(aij);CHKERRQ(ierr);
1513   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1514      PetscFunctionReturn(0);
1515   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1516     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1517     for (i=0; i<a->mbs; i++) {
1518       for (j=0; j<bs; j++) {
1519         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1520         for (k=a->i[i]; k<a->i[i+1]; k++) {
1521           for (l=0; l<bs; l++) {
1522 #if defined(PETSC_USE_COMPLEX)
1523             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1525                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1526             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1527               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1528                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1529             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1531             }
1532 #else
1533             if (a->a[bs2*k + l*bs + j] != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1535             }
1536 #endif
1537           }
1538         }
1539         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1540       }
1541     }
1542     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1543   } else {
1544     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1545     for (i=0; i<a->mbs; i++) {
1546       for (j=0; j<bs; j++) {
1547         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1548         for (k=a->i[i]; k<a->i[i+1]; k++) {
1549           for (l=0; l<bs; l++) {
1550 #if defined(PETSC_USE_COMPLEX)
1551             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1553                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1554             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1555               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1556                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1557             } else {
1558               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1559             }
1560 #else
1561             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1562 #endif
1563           }
1564         }
1565         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1566       }
1567     }
1568     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1569   }
1570   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1571   PetscFunctionReturn(0);
1572 }
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1576 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1577 {
1578   Mat            A = (Mat) Aa;
1579   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1580   PetscErrorCode ierr;
1581   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1582   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1583   MatScalar      *aa;
1584   PetscViewer    viewer;
1585 
1586   PetscFunctionBegin;
1587 
1588   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1589   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1590 
1591   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1592 
1593   /* loop over matrix elements drawing boxes */
1594   color = PETSC_DRAW_BLUE;
1595   for (i=0,row=0; i<mbs; i++,row+=bs) {
1596     for (j=a->i[i]; j<a->i[i+1]; j++) {
1597       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1598       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1599       aa = a->a + j*bs2;
1600       for (k=0; k<bs; k++) {
1601         for (l=0; l<bs; l++) {
1602           if (PetscRealPart(*aa++) >=  0.) continue;
1603           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1604         }
1605       }
1606     }
1607   }
1608   color = PETSC_DRAW_CYAN;
1609   for (i=0,row=0; i<mbs; i++,row+=bs) {
1610     for (j=a->i[i]; j<a->i[i+1]; j++) {
1611       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1612       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1613       aa = a->a + j*bs2;
1614       for (k=0; k<bs; k++) {
1615         for (l=0; l<bs; l++) {
1616           if (PetscRealPart(*aa++) != 0.) continue;
1617           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1618         }
1619       }
1620     }
1621   }
1622 
1623   color = PETSC_DRAW_RED;
1624   for (i=0,row=0; i<mbs; i++,row+=bs) {
1625     for (j=a->i[i]; j<a->i[i+1]; j++) {
1626       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1627       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1628       aa = a->a + j*bs2;
1629       for (k=0; k<bs; k++) {
1630         for (l=0; l<bs; l++) {
1631           if (PetscRealPart(*aa++) <= 0.) continue;
1632           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1633         }
1634       }
1635     }
1636   }
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1642 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1643 {
1644   PetscErrorCode ierr;
1645   PetscReal      xl,yl,xr,yr,w,h;
1646   PetscDraw      draw;
1647   PetscTruth     isnull;
1648 
1649   PetscFunctionBegin;
1650 
1651   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1652   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1653 
1654   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1655   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1656   xr += w;    yr += h;  xl = -w;     yl = -h;
1657   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1658   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1659   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatView_SeqBAIJ"
1665 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1666 {
1667   PetscErrorCode ierr;
1668   PetscTruth     iascii,isbinary,isdraw;
1669 
1670   PetscFunctionBegin;
1671   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1672   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1673   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1674   if (iascii){
1675     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1676   } else if (isbinary) {
1677     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1678   } else if (isdraw) {
1679     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1680   } else {
1681     Mat B;
1682     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1683     ierr = MatView(B,viewer);CHKERRQ(ierr);
1684     ierr = MatDestroy(B);CHKERRQ(ierr);
1685   }
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 
1690 #undef __FUNCT__
1691 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1692 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1693 {
1694   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1695   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1696   PetscInt    *ai = a->i,*ailen = a->ilen;
1697   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1698   MatScalar   *ap,*aa = a->a;
1699 
1700   PetscFunctionBegin;
1701   for (k=0; k<m; k++) { /* loop over rows */
1702     row  = im[k]; brow = row/bs;
1703     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1704     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1705     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1706     nrow = ailen[brow];
1707     for (l=0; l<n; l++) { /* loop over columns */
1708       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1709       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1710       col  = in[l] ;
1711       bcol = col/bs;
1712       cidx = col%bs;
1713       ridx = row%bs;
1714       high = nrow;
1715       low  = 0; /* assume unsorted */
1716       while (high-low > 5) {
1717         t = (low+high)/2;
1718         if (rp[t] > bcol) high = t;
1719         else             low  = t;
1720       }
1721       for (i=low; i<high; i++) {
1722         if (rp[i] > bcol) break;
1723         if (rp[i] == bcol) {
1724           *v++ = ap[bs2*i+bs*cidx+ridx];
1725           goto finished;
1726         }
1727       }
1728       *v++ = 0.0;
1729       finished:;
1730     }
1731   }
1732   PetscFunctionReturn(0);
1733 }
1734 
1735 #define CHUNKSIZE 10
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1738 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1739 {
1740   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1741   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1742   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1743   PetscErrorCode    ierr;
1744   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1745   PetscTruth        roworiented=a->roworiented;
1746   const PetscScalar *value = v;
1747   MatScalar         *ap,*aa = a->a,*bap;
1748 
1749   PetscFunctionBegin;
1750   if (roworiented) {
1751     stepval = (n-1)*bs;
1752   } else {
1753     stepval = (m-1)*bs;
1754   }
1755   for (k=0; k<m; k++) { /* loop over added rows */
1756     row  = im[k];
1757     if (row < 0) continue;
1758 #if defined(PETSC_USE_DEBUG)
1759     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1760 #endif
1761     rp   = aj + ai[row];
1762     ap   = aa + bs2*ai[row];
1763     rmax = imax[row];
1764     nrow = ailen[row];
1765     low  = 0;
1766     high = nrow;
1767     for (l=0; l<n; l++) { /* loop over added columns */
1768       if (in[l] < 0) continue;
1769 #if defined(PETSC_USE_DEBUG)
1770       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1771 #endif
1772       col = in[l];
1773       if (roworiented) {
1774         value = v + k*(stepval+bs)*bs + l*bs;
1775       } else {
1776         value = v + l*(stepval+bs)*bs + k*bs;
1777       }
1778       if (col <= lastcol) low = 0; else high = nrow;
1779       lastcol = col;
1780       while (high-low > 7) {
1781         t = (low+high)/2;
1782         if (rp[t] > col) high = t;
1783         else             low  = t;
1784       }
1785       for (i=low; i<high; i++) {
1786         if (rp[i] > col) break;
1787         if (rp[i] == col) {
1788           bap  = ap +  bs2*i;
1789           if (roworiented) {
1790             if (is == ADD_VALUES) {
1791               for (ii=0; ii<bs; ii++,value+=stepval) {
1792                 for (jj=ii; jj<bs2; jj+=bs) {
1793                   bap[jj] += *value++;
1794                 }
1795               }
1796             } else {
1797               for (ii=0; ii<bs; ii++,value+=stepval) {
1798                 for (jj=ii; jj<bs2; jj+=bs) {
1799                   bap[jj] = *value++;
1800                 }
1801               }
1802             }
1803           } else {
1804             if (is == ADD_VALUES) {
1805               for (ii=0; ii<bs; ii++,value+=stepval) {
1806                 for (jj=0; jj<bs; jj++) {
1807                   *bap++ += *value++;
1808                 }
1809               }
1810             } else {
1811               for (ii=0; ii<bs; ii++,value+=stepval) {
1812                 for (jj=0; jj<bs; jj++) {
1813                   *bap++  = *value++;
1814                 }
1815               }
1816             }
1817           }
1818           goto noinsert2;
1819         }
1820       }
1821       if (nonew == 1) goto noinsert2;
1822       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1823       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1824       N = nrow++ - 1; high++;
1825       /* shift up all the later entries in this row */
1826       for (ii=N; ii>=i; ii--) {
1827         rp[ii+1] = rp[ii];
1828         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1829       }
1830       if (N >= i) {
1831         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1832       }
1833       rp[i] = col;
1834       bap   = ap +  bs2*i;
1835       if (roworiented) {
1836         for (ii=0; ii<bs; ii++,value+=stepval) {
1837           for (jj=ii; jj<bs2; jj+=bs) {
1838             bap[jj] = *value++;
1839           }
1840         }
1841       } else {
1842         for (ii=0; ii<bs; ii++,value+=stepval) {
1843           for (jj=0; jj<bs; jj++) {
1844             *bap++  = *value++;
1845           }
1846         }
1847       }
1848       noinsert2:;
1849       low = i;
1850     }
1851     ailen[row] = nrow;
1852   }
1853   PetscFunctionReturn(0);
1854 }
1855 
1856 #undef __FUNCT__
1857 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1858 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1859 {
1860   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1861   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1862   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1863   PetscErrorCode ierr;
1864   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1865   MatScalar      *aa = a->a,*ap;
1866   PetscReal      ratio=0.6;
1867 
1868   PetscFunctionBegin;
1869   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1870 
1871   if (m) rmax = ailen[0];
1872   for (i=1; i<mbs; i++) {
1873     /* move each row back by the amount of empty slots (fshift) before it*/
1874     fshift += imax[i-1] - ailen[i-1];
1875     rmax   = PetscMax(rmax,ailen[i]);
1876     if (fshift) {
1877       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1878       N = ailen[i];
1879       for (j=0; j<N; j++) {
1880         ip[j-fshift] = ip[j];
1881         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1882       }
1883     }
1884     ai[i] = ai[i-1] + ailen[i-1];
1885   }
1886   if (mbs) {
1887     fshift += imax[mbs-1] - ailen[mbs-1];
1888     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1889   }
1890   /* reset ilen and imax for each row */
1891   for (i=0; i<mbs; i++) {
1892     ailen[i] = imax[i] = ai[i+1] - ai[i];
1893   }
1894   a->nz = ai[mbs];
1895 
1896   /* diagonals may have moved, so kill the diagonal pointers */
1897   a->idiagvalid = PETSC_FALSE;
1898   if (fshift && a->diag) {
1899     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1900     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1901     a->diag = 0;
1902   }
1903   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1904   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1905   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1906   a->reallocs          = 0;
1907   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1908 
1909   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1910   if (a->compressedrow.use){
1911     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1912   }
1913 
1914   A->same_nonzero = PETSC_TRUE;
1915   PetscFunctionReturn(0);
1916 }
1917 
1918 /*
1919    This function returns an array of flags which indicate the locations of contiguous
1920    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1921    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1922    Assume: sizes should be long enough to hold all the values.
1923 */
1924 #undef __FUNCT__
1925 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1926 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1927 {
1928   PetscInt   i,j,k,row;
1929   PetscTruth flg;
1930 
1931   PetscFunctionBegin;
1932   for (i=0,j=0; i<n; j++) {
1933     row = idx[i];
1934     if (row%bs!=0) { /* Not the begining of a block */
1935       sizes[j] = 1;
1936       i++;
1937     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1938       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1939       i++;
1940     } else { /* Begining of the block, so check if the complete block exists */
1941       flg = PETSC_TRUE;
1942       for (k=1; k<bs; k++) {
1943         if (row+k != idx[i+k]) { /* break in the block */
1944           flg = PETSC_FALSE;
1945           break;
1946         }
1947       }
1948       if (flg) { /* No break in the bs */
1949         sizes[j] = bs;
1950         i+= bs;
1951       } else {
1952         sizes[j] = 1;
1953         i++;
1954       }
1955     }
1956   }
1957   *bs_max = j;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1963 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1964 {
1965   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1966   PetscErrorCode ierr;
1967   PetscInt       i,j,k,count,*rows;
1968   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1969   PetscScalar    zero = 0.0;
1970   MatScalar      *aa;
1971 
1972   PetscFunctionBegin;
1973   /* Make a copy of the IS and  sort it */
1974   /* allocate memory for rows,sizes */
1975   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1976   sizes = rows + is_n;
1977 
1978   /* copy IS values to rows, and sort them */
1979   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1980   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1981   if (baij->keepzeroedrows) {
1982     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1983     bs_max = is_n;
1984     A->same_nonzero = PETSC_TRUE;
1985   } else {
1986     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1987     A->same_nonzero = PETSC_FALSE;
1988   }
1989 
1990   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1991     row   = rows[j];
1992     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1993     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1994     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1995     if (sizes[i] == bs && !baij->keepzeroedrows) {
1996       if (diag != 0.0) {
1997         if (baij->ilen[row/bs] > 0) {
1998           baij->ilen[row/bs]       = 1;
1999           baij->j[baij->i[row/bs]] = row/bs;
2000           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2001         }
2002         /* Now insert all the diagonal values for this bs */
2003         for (k=0; k<bs; k++) {
2004           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2005         }
2006       } else { /* (diag == 0.0) */
2007         baij->ilen[row/bs] = 0;
2008       } /* end (diag == 0.0) */
2009     } else { /* (sizes[i] != bs) */
2010 #if defined (PETSC_USE_DEBUG)
2011       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2012 #endif
2013       for (k=0; k<count; k++) {
2014         aa[0] =  zero;
2015         aa    += bs;
2016       }
2017       if (diag != 0.0) {
2018         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2019       }
2020     }
2021   }
2022 
2023   ierr = PetscFree(rows);CHKERRQ(ierr);
2024   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2025   PetscFunctionReturn(0);
2026 }
2027 
2028 #undef __FUNCT__
2029 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2030 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2031 {
2032   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2033   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2034   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2035   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2036   PetscErrorCode ierr;
2037   PetscInt       ridx,cidx,bs2=a->bs2;
2038   PetscTruth     roworiented=a->roworiented;
2039   MatScalar      *ap,value,*aa=a->a,*bap;
2040 
2041   PetscFunctionBegin;
2042   for (k=0; k<m; k++) { /* loop over added rows */
2043     row  = im[k];
2044     brow = row/bs;
2045     if (row < 0) continue;
2046 #if defined(PETSC_USE_DEBUG)
2047     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2048 #endif
2049     rp   = aj + ai[brow];
2050     ap   = aa + bs2*ai[brow];
2051     rmax = imax[brow];
2052     nrow = ailen[brow];
2053     low  = 0;
2054     high = nrow;
2055     for (l=0; l<n; l++) { /* loop over added columns */
2056       if (in[l] < 0) continue;
2057 #if defined(PETSC_USE_DEBUG)
2058       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2059 #endif
2060       col = in[l]; bcol = col/bs;
2061       ridx = row % bs; cidx = col % bs;
2062       if (roworiented) {
2063         value = v[l + k*n];
2064       } else {
2065         value = v[k + l*m];
2066       }
2067       if (col <= lastcol) low = 0; else high = nrow;
2068       lastcol = col;
2069       while (high-low > 7) {
2070         t = (low+high)/2;
2071         if (rp[t] > bcol) high = t;
2072         else              low  = t;
2073       }
2074       for (i=low; i<high; i++) {
2075         if (rp[i] > bcol) break;
2076         if (rp[i] == bcol) {
2077           bap  = ap +  bs2*i + bs*cidx + ridx;
2078           if (is == ADD_VALUES) *bap += value;
2079           else                  *bap  = value;
2080           goto noinsert1;
2081         }
2082       }
2083       if (nonew == 1) goto noinsert1;
2084       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2085       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2086       N = nrow++ - 1; high++;
2087       /* shift up all the later entries in this row */
2088       for (ii=N; ii>=i; ii--) {
2089         rp[ii+1] = rp[ii];
2090         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2091       }
2092       if (N>=i) {
2093         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2094       }
2095       rp[i]                      = bcol;
2096       ap[bs2*i + bs*cidx + ridx] = value;
2097       a->nz++;
2098       noinsert1:;
2099       low = i;
2100     }
2101     ailen[brow] = nrow;
2102   }
2103   A->same_nonzero = PETSC_FALSE;
2104   PetscFunctionReturn(0);
2105 }
2106 
2107 
2108 #undef __FUNCT__
2109 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2110 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2111 {
2112   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2113   Mat            outA;
2114   PetscErrorCode ierr;
2115   PetscTruth     row_identity,col_identity;
2116 
2117   PetscFunctionBegin;
2118   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2119   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2120   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2121   if (!row_identity || !col_identity) {
2122     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2123   }
2124 
2125   outA          = inA;
2126   inA->factor   = FACTOR_LU;
2127 
2128   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2129 
2130   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2131   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2132   a->row = row;
2133   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2134   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2135   a->col = col;
2136 
2137   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2138   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2140 
2141   /*
2142       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2143       for ILU(0) factorization with natural ordering
2144   */
2145   if (inA->rmap.bs < 8) {
2146     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2147   } else {
2148     if (!a->solve_work) {
2149       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2150       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2151     }
2152   }
2153 
2154   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2155 
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 EXTERN_C_BEGIN
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2162 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2163 {
2164   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2165   PetscInt    i,nz,nbs;
2166 
2167   PetscFunctionBegin;
2168   nz  = baij->maxnz/baij->bs2;
2169   nbs = baij->nbs;
2170   for (i=0; i<nz; i++) {
2171     baij->j[i] = indices[i];
2172   }
2173   baij->nz = nz;
2174   for (i=0; i<nbs; i++) {
2175     baij->ilen[i] = baij->imax[i];
2176   }
2177 
2178   PetscFunctionReturn(0);
2179 }
2180 EXTERN_C_END
2181 
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2184 /*@
2185     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2186        in the matrix.
2187 
2188   Input Parameters:
2189 +  mat - the SeqBAIJ matrix
2190 -  indices - the column indices
2191 
2192   Level: advanced
2193 
2194   Notes:
2195     This can be called if you have precomputed the nonzero structure of the
2196   matrix and want to provide it to the matrix object to improve the performance
2197   of the MatSetValues() operation.
2198 
2199     You MUST have set the correct numbers of nonzeros per row in the call to
2200   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2201 
2202     MUST be called before any calls to MatSetValues();
2203 
2204 @*/
2205 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2206 {
2207   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2208 
2209   PetscFunctionBegin;
2210   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2211   PetscValidPointer(indices,2);
2212   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2213   if (f) {
2214     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2215   } else {
2216     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2217   }
2218   PetscFunctionReturn(0);
2219 }
2220 
2221 #undef __FUNCT__
2222 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2223 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2224 {
2225   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2226   PetscErrorCode ierr;
2227   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2228   PetscReal      atmp;
2229   PetscScalar    *x,zero = 0.0;
2230   MatScalar      *aa;
2231   PetscInt       ncols,brow,krow,kcol;
2232 
2233   PetscFunctionBegin;
2234   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2235   bs   = A->rmap.bs;
2236   aa   = a->a;
2237   ai   = a->i;
2238   aj   = a->j;
2239   mbs  = a->mbs;
2240 
2241   ierr = VecSet(v,zero);CHKERRQ(ierr);
2242   if (idx) {
2243     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
2244   }
2245   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2246   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2247   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2248   for (i=0; i<mbs; i++) {
2249     ncols = ai[1] - ai[0]; ai++;
2250     brow  = bs*i;
2251     for (j=0; j<ncols; j++){
2252       for (kcol=0; kcol<bs; kcol++){
2253         for (krow=0; krow<bs; krow++){
2254           atmp = PetscAbsScalar(*aa);aa++;
2255           row = brow + krow;    /* row index */
2256           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2257           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2258         }
2259       }
2260       aj++;
2261     }
2262   }
2263   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatCopy_SeqBAIJ"
2269 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2270 {
2271   PetscErrorCode ierr;
2272 
2273   PetscFunctionBegin;
2274   /* If the two matrices have the same copy implementation, use fast copy. */
2275   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2276     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2277     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2278 
2279     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2280       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2281     }
2282     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2283   } else {
2284     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2285   }
2286   PetscFunctionReturn(0);
2287 }
2288 
2289 #undef __FUNCT__
2290 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2291 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2292 {
2293   PetscErrorCode ierr;
2294 
2295   PetscFunctionBegin;
2296   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2297   PetscFunctionReturn(0);
2298 }
2299 
2300 #undef __FUNCT__
2301 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2302 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2303 {
2304   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2305   PetscFunctionBegin;
2306   *array = a->a;
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2312 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2313 {
2314   PetscFunctionBegin;
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 #include "petscblaslapack.h"
2319 #undef __FUNCT__
2320 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2321 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2322 {
2323   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2324   PetscErrorCode ierr;
2325   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2326   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2327 
2328   PetscFunctionBegin;
2329   if (str == SAME_NONZERO_PATTERN) {
2330     PetscScalar alpha = a;
2331     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2332   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2333     if (y->xtoy && y->XtoY != X) {
2334       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2335       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2336     }
2337     if (!y->xtoy) { /* get xtoy */
2338       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2339       y->XtoY = X;
2340     }
2341     bs2 = bs*bs;
2342     for (i=0; i<x->nz; i++) {
2343       j = 0;
2344       while (j < bs2){
2345         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2346         j++;
2347       }
2348     }
2349     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2350   } else {
2351     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2352   }
2353   PetscFunctionReturn(0);
2354 }
2355 
2356 #undef __FUNCT__
2357 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2358 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2359 {
2360   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2361   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2362   MatScalar      *aa = a->a;
2363 
2364   PetscFunctionBegin;
2365   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2371 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2372 {
2373   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2374   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2375   MatScalar      *aa = a->a;
2376 
2377   PetscFunctionBegin;
2378   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2379   PetscFunctionReturn(0);
2380 }
2381 
2382 
2383 /* -------------------------------------------------------------------*/
2384 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2385        MatGetRow_SeqBAIJ,
2386        MatRestoreRow_SeqBAIJ,
2387        MatMult_SeqBAIJ_N,
2388 /* 4*/ MatMultAdd_SeqBAIJ_N,
2389        MatMultTranspose_SeqBAIJ,
2390        MatMultTransposeAdd_SeqBAIJ,
2391        MatSolve_SeqBAIJ_N,
2392        0,
2393        0,
2394 /*10*/ 0,
2395        MatLUFactor_SeqBAIJ,
2396        0,
2397        0,
2398        MatTranspose_SeqBAIJ,
2399 /*15*/ MatGetInfo_SeqBAIJ,
2400        MatEqual_SeqBAIJ,
2401        MatGetDiagonal_SeqBAIJ,
2402        MatDiagonalScale_SeqBAIJ,
2403        MatNorm_SeqBAIJ,
2404 /*20*/ 0,
2405        MatAssemblyEnd_SeqBAIJ,
2406        0,
2407        MatSetOption_SeqBAIJ,
2408        MatZeroEntries_SeqBAIJ,
2409 /*25*/ MatZeroRows_SeqBAIJ,
2410        MatLUFactorSymbolic_SeqBAIJ,
2411        MatLUFactorNumeric_SeqBAIJ_N,
2412        MatCholeskyFactorSymbolic_SeqBAIJ,
2413        MatCholeskyFactorNumeric_SeqBAIJ_N,
2414 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2415        MatILUFactorSymbolic_SeqBAIJ,
2416        MatICCFactorSymbolic_SeqBAIJ,
2417        MatGetArray_SeqBAIJ,
2418        MatRestoreArray_SeqBAIJ,
2419 /*35*/ MatDuplicate_SeqBAIJ,
2420        0,
2421        0,
2422        MatILUFactor_SeqBAIJ,
2423        0,
2424 /*40*/ MatAXPY_SeqBAIJ,
2425        MatGetSubMatrices_SeqBAIJ,
2426        MatIncreaseOverlap_SeqBAIJ,
2427        MatGetValues_SeqBAIJ,
2428        MatCopy_SeqBAIJ,
2429 /*45*/ 0,
2430        MatScale_SeqBAIJ,
2431        0,
2432        0,
2433        0,
2434 /*50*/ 0,
2435        MatGetRowIJ_SeqBAIJ,
2436        MatRestoreRowIJ_SeqBAIJ,
2437        0,
2438        0,
2439 /*55*/ 0,
2440        0,
2441        0,
2442        0,
2443        MatSetValuesBlocked_SeqBAIJ,
2444 /*60*/ MatGetSubMatrix_SeqBAIJ,
2445        MatDestroy_SeqBAIJ,
2446        MatView_SeqBAIJ,
2447        0,
2448        0,
2449 /*65*/ 0,
2450        0,
2451        0,
2452        0,
2453        0,
2454 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2455        MatConvert_Basic,
2456        0,
2457        0,
2458        0,
2459 /*75*/ 0,
2460        0,
2461        0,
2462        0,
2463        0,
2464 /*80*/ 0,
2465        0,
2466        0,
2467        0,
2468        MatLoad_SeqBAIJ,
2469 /*85*/ 0,
2470        0,
2471        0,
2472        0,
2473        0,
2474 /*90*/ 0,
2475        0,
2476        0,
2477        0,
2478        0,
2479 /*95*/ 0,
2480        0,
2481        0,
2482        0,
2483        0,
2484 /*100*/0,
2485        0,
2486        0,
2487        0,
2488        0,
2489 /*105*/0,
2490        MatRealPart_SeqBAIJ,
2491        MatImaginaryPart_SeqBAIJ,
2492        0,
2493        0,
2494 /*110*/0,
2495        0,
2496        0,
2497        0,
2498        MatMissingDiagonal_SeqBAIJ
2499 /*115*/
2500 };
2501 
2502 EXTERN_C_BEGIN
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2505 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2506 {
2507   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2508   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2509   PetscErrorCode ierr;
2510 
2511   PetscFunctionBegin;
2512   if (aij->nonew != 1) {
2513     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2514   }
2515 
2516   /* allocate space for values if not already there */
2517   if (!aij->saved_values) {
2518     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2519   }
2520 
2521   /* copy values over */
2522   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2523   PetscFunctionReturn(0);
2524 }
2525 EXTERN_C_END
2526 
2527 EXTERN_C_BEGIN
2528 #undef __FUNCT__
2529 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2530 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2531 {
2532   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2533   PetscErrorCode ierr;
2534   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2535 
2536   PetscFunctionBegin;
2537   if (aij->nonew != 1) {
2538     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2539   }
2540   if (!aij->saved_values) {
2541     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2542   }
2543 
2544   /* copy values over */
2545   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2546   PetscFunctionReturn(0);
2547 }
2548 EXTERN_C_END
2549 
2550 EXTERN_C_BEGIN
2551 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2552 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2553 EXTERN_C_END
2554 
2555 EXTERN_C_BEGIN
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2558 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2559 {
2560   Mat_SeqBAIJ    *b;
2561   PetscErrorCode ierr;
2562   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2563   PetscTruth     flg,skipallocation = PETSC_FALSE;
2564 
2565   PetscFunctionBegin;
2566 
2567   if (nz == MAT_SKIP_ALLOCATION) {
2568     skipallocation = PETSC_TRUE;
2569     nz             = 0;
2570   }
2571 
2572   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2573     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2574   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2575 
2576   if (nnz && newbs != bs) {
2577     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2578   }
2579   bs   = newbs;
2580 
2581   B->rmap.bs = B->cmap.bs = bs;
2582   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2583   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2584 
2585   B->preallocated = PETSC_TRUE;
2586 
2587   mbs  = B->rmap.n/bs;
2588   nbs  = B->cmap.n/bs;
2589   bs2  = bs*bs;
2590 
2591   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2592     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2593   }
2594 
2595   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2596   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2597   if (nnz) {
2598     for (i=0; i<mbs; i++) {
2599       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2600       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2601     }
2602   }
2603 
2604   b       = (Mat_SeqBAIJ*)B->data;
2605   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2606     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2607   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2608 
2609   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2610   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2611   if (!flg) {
2612     switch (bs) {
2613     case 1:
2614       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2615       B->ops->mult            = MatMult_SeqBAIJ_1;
2616       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2617       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2618       break;
2619     case 2:
2620       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2621       B->ops->mult            = MatMult_SeqBAIJ_2;
2622       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2623       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2624       break;
2625     case 3:
2626       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2627       B->ops->mult            = MatMult_SeqBAIJ_3;
2628       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2629       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2630       break;
2631     case 4:
2632       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2633       B->ops->mult            = MatMult_SeqBAIJ_4;
2634       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2635       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2636       break;
2637     case 5:
2638       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2639       B->ops->mult            = MatMult_SeqBAIJ_5;
2640       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2641       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2642       break;
2643     case 6:
2644       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2645       B->ops->mult            = MatMult_SeqBAIJ_6;
2646       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2647       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2648       break;
2649     case 7:
2650       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2651       B->ops->mult            = MatMult_SeqBAIJ_7;
2652       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2653       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2654       break;
2655     default:
2656       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2657       B->ops->mult            = MatMult_SeqBAIJ_N;
2658       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2659       break;
2660     }
2661   }
2662   B->rmap.bs      = bs;
2663   b->mbs     = mbs;
2664   b->nbs     = nbs;
2665   if (!skipallocation) {
2666     if (!b->imax) {
2667       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2668     }
2669     /* b->ilen will count nonzeros in each block row so far. */
2670     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2671     if (!nnz) {
2672       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2673       else if (nz <= 0)        nz = 1;
2674       for (i=0; i<mbs; i++) b->imax[i] = nz;
2675       nz = nz*mbs;
2676     } else {
2677       nz = 0;
2678       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2679     }
2680 
2681     /* allocate the matrix space */
2682     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2683     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2684     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2685     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2686     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2687     b->singlemalloc = PETSC_TRUE;
2688     b->i[0] = 0;
2689     for (i=1; i<mbs+1; i++) {
2690       b->i[i] = b->i[i-1] + b->imax[i-1];
2691     }
2692     b->free_a     = PETSC_TRUE;
2693     b->free_ij    = PETSC_TRUE;
2694   } else {
2695     b->free_a     = PETSC_FALSE;
2696     b->free_ij    = PETSC_FALSE;
2697   }
2698 
2699   B->rmap.bs          = bs;
2700   b->bs2              = bs2;
2701   b->mbs              = mbs;
2702   b->nz               = 0;
2703   b->maxnz            = nz*bs2;
2704   B->info.nz_unneeded = (PetscReal)b->maxnz;
2705   PetscFunctionReturn(0);
2706 }
2707 EXTERN_C_END
2708 
2709 /*MC
2710    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2711    block sparse compressed row format.
2712 
2713    Options Database Keys:
2714 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2715 
2716   Level: beginner
2717 
2718 .seealso: MatCreateSeqBAIJ()
2719 M*/
2720 
2721 EXTERN_C_BEGIN
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatCreate_SeqBAIJ"
2724 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2725 {
2726   PetscErrorCode ierr;
2727   PetscMPIInt    size;
2728   Mat_SeqBAIJ    *b;
2729 
2730   PetscFunctionBegin;
2731   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2732   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2733 
2734   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2735   B->data = (void*)b;
2736   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2737   B->factor           = 0;
2738   B->mapping          = 0;
2739   b->row              = 0;
2740   b->col              = 0;
2741   b->icol             = 0;
2742   b->reallocs         = 0;
2743   b->saved_values     = 0;
2744 
2745   b->roworiented      = PETSC_TRUE;
2746   b->nonew            = 0;
2747   b->diag             = 0;
2748   b->solve_work       = 0;
2749   b->mult_work        = 0;
2750   B->spptr            = 0;
2751   B->info.nz_unneeded = (PetscReal)b->maxnz;
2752   b->keepzeroedrows   = PETSC_FALSE;
2753   b->xtoy              = 0;
2754   b->XtoY              = 0;
2755   b->compressedrow.use     = PETSC_FALSE;
2756   b->compressedrow.nrows   = 0;
2757   b->compressedrow.i       = PETSC_NULL;
2758   b->compressedrow.rindex  = PETSC_NULL;
2759   b->compressedrow.checked = PETSC_FALSE;
2760   B->same_nonzero          = PETSC_FALSE;
2761 
2762   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2763                                      "MatInvertBlockDiagonal_SeqBAIJ",
2764                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2766                                      "MatStoreValues_SeqBAIJ",
2767                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2769                                      "MatRetrieveValues_SeqBAIJ",
2770                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2772                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2773                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2775                                      "MatConvert_SeqBAIJ_SeqAIJ",
2776                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2778                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2779                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2781                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2782                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2783   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2784   PetscFunctionReturn(0);
2785 }
2786 EXTERN_C_END
2787 
2788 #undef __FUNCT__
2789 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2790 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2791 {
2792   Mat            C;
2793   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2794   PetscErrorCode ierr;
2795   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2796 
2797   PetscFunctionBegin;
2798   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2799 
2800   *B = 0;
2801   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2802   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2803   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2804   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2805   c    = (Mat_SeqBAIJ*)C->data;
2806 
2807   C->rmap.N   = A->rmap.N;
2808   C->cmap.N   = A->cmap.N;
2809   C->rmap.bs  = A->rmap.bs;
2810   c->bs2 = a->bs2;
2811   c->mbs = a->mbs;
2812   c->nbs = a->nbs;
2813 
2814   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2815   for (i=0; i<mbs; i++) {
2816     c->imax[i] = a->imax[i];
2817     c->ilen[i] = a->ilen[i];
2818   }
2819 
2820   /* allocate the matrix space */
2821   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2822   c->singlemalloc = PETSC_TRUE;
2823   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2824   if (mbs > 0) {
2825     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2826     if (cpvalues == MAT_COPY_VALUES) {
2827       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2828     } else {
2829       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2830     }
2831   }
2832   c->roworiented = a->roworiented;
2833   c->nonew       = a->nonew;
2834 
2835   if (a->diag) {
2836     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2837     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2838     for (i=0; i<mbs; i++) {
2839       c->diag[i] = a->diag[i];
2840     }
2841   } else c->diag        = 0;
2842   c->nz                 = a->nz;
2843   c->maxnz              = a->maxnz;
2844   c->solve_work         = 0;
2845   c->mult_work          = 0;
2846   c->free_a             = PETSC_TRUE;
2847   c->free_ij            = PETSC_TRUE;
2848   C->preallocated       = PETSC_TRUE;
2849   C->assembled          = PETSC_TRUE;
2850 
2851   c->compressedrow.use     = a->compressedrow.use;
2852   c->compressedrow.nrows   = a->compressedrow.nrows;
2853   c->compressedrow.checked = a->compressedrow.checked;
2854   if ( a->compressedrow.checked && a->compressedrow.use){
2855     i = a->compressedrow.nrows;
2856     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2857     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2858     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2859     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2860   } else {
2861     c->compressedrow.use    = PETSC_FALSE;
2862     c->compressedrow.i      = PETSC_NULL;
2863     c->compressedrow.rindex = PETSC_NULL;
2864   }
2865   C->same_nonzero = A->same_nonzero;
2866   *B = C;
2867   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 #undef __FUNCT__
2872 #define __FUNCT__ "MatLoad_SeqBAIJ"
2873 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2874 {
2875   Mat_SeqBAIJ    *a;
2876   Mat            B;
2877   PetscErrorCode ierr;
2878   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2879   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2880   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2881   PetscInt       *masked,nmask,tmp,bs2,ishift;
2882   PetscMPIInt    size;
2883   int            fd;
2884   PetscScalar    *aa;
2885   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2886 
2887   PetscFunctionBegin;
2888   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2889     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2890   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2891   bs2  = bs*bs;
2892 
2893   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2894   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2895   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2896   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2897   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2898   M = header[1]; N = header[2]; nz = header[3];
2899 
2900   if (header[3] < 0) {
2901     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2902   }
2903 
2904   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2905 
2906   /*
2907      This code adds extra rows to make sure the number of rows is
2908     divisible by the blocksize
2909   */
2910   mbs        = M/bs;
2911   extra_rows = bs - M + bs*(mbs);
2912   if (extra_rows == bs) extra_rows = 0;
2913   else                  mbs++;
2914   if (extra_rows) {
2915     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2916   }
2917 
2918   /* read in row lengths */
2919   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2920   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2921   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2922 
2923   /* read in column indices */
2924   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2925   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2926   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2927 
2928   /* loop over row lengths determining block row lengths */
2929   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2930   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2931   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2932   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2933   masked   = mask + mbs;
2934   rowcount = 0; nzcount = 0;
2935   for (i=0; i<mbs; i++) {
2936     nmask = 0;
2937     for (j=0; j<bs; j++) {
2938       kmax = rowlengths[rowcount];
2939       for (k=0; k<kmax; k++) {
2940         tmp = jj[nzcount++]/bs;
2941         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2942       }
2943       rowcount++;
2944     }
2945     browlengths[i] += nmask;
2946     /* zero out the mask elements we set */
2947     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2948   }
2949 
2950   /* create our matrix */
2951   ierr = MatCreate(comm,&B);
2952   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2953   ierr = MatSetType(B,type);CHKERRQ(ierr);
2954   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2955   a = (Mat_SeqBAIJ*)B->data;
2956 
2957   /* set matrix "i" values */
2958   a->i[0] = 0;
2959   for (i=1; i<= mbs; i++) {
2960     a->i[i]      = a->i[i-1] + browlengths[i-1];
2961     a->ilen[i-1] = browlengths[i-1];
2962   }
2963   a->nz         = 0;
2964   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2965 
2966   /* read in nonzero values */
2967   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2968   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2969   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2970 
2971   /* set "a" and "j" values into matrix */
2972   nzcount = 0; jcount = 0;
2973   for (i=0; i<mbs; i++) {
2974     nzcountb = nzcount;
2975     nmask    = 0;
2976     for (j=0; j<bs; j++) {
2977       kmax = rowlengths[i*bs+j];
2978       for (k=0; k<kmax; k++) {
2979         tmp = jj[nzcount++]/bs;
2980 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2981       }
2982     }
2983     /* sort the masked values */
2984     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2985 
2986     /* set "j" values into matrix */
2987     maskcount = 1;
2988     for (j=0; j<nmask; j++) {
2989       a->j[jcount++]  = masked[j];
2990       mask[masked[j]] = maskcount++;
2991     }
2992     /* set "a" values into matrix */
2993     ishift = bs2*a->i[i];
2994     for (j=0; j<bs; j++) {
2995       kmax = rowlengths[i*bs+j];
2996       for (k=0; k<kmax; k++) {
2997         tmp       = jj[nzcountb]/bs ;
2998         block     = mask[tmp] - 1;
2999         point     = jj[nzcountb] - bs*tmp;
3000         idx       = ishift + bs2*block + j + bs*point;
3001         a->a[idx] = (MatScalar)aa[nzcountb++];
3002       }
3003     }
3004     /* zero out the mask elements we set */
3005     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3006   }
3007   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3008 
3009   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3010   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3011   ierr = PetscFree(aa);CHKERRQ(ierr);
3012   ierr = PetscFree(jj);CHKERRQ(ierr);
3013   ierr = PetscFree(mask);CHKERRQ(ierr);
3014 
3015   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3016   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3017   ierr = MatView_Private(B);CHKERRQ(ierr);
3018 
3019   *A = B;
3020   PetscFunctionReturn(0);
3021 }
3022 
3023 #undef __FUNCT__
3024 #define __FUNCT__ "MatCreateSeqBAIJ"
3025 /*@C
3026    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3027    compressed row) format.  For good matrix assembly performance the
3028    user should preallocate the matrix storage by setting the parameter nz
3029    (or the array nnz).  By setting these parameters accurately, performance
3030    during matrix assembly can be increased by more than a factor of 50.
3031 
3032    Collective on MPI_Comm
3033 
3034    Input Parameters:
3035 +  comm - MPI communicator, set to PETSC_COMM_SELF
3036 .  bs - size of block
3037 .  m - number of rows
3038 .  n - number of columns
3039 .  nz - number of nonzero blocks  per block row (same for all rows)
3040 -  nnz - array containing the number of nonzero blocks in the various block rows
3041          (possibly different for each block row) or PETSC_NULL
3042 
3043    Output Parameter:
3044 .  A - the matrix
3045 
3046    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3047    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3048    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3049    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3050 
3051    Options Database Keys:
3052 .   -mat_no_unroll - uses code that does not unroll the loops in the
3053                      block calculations (much slower)
3054 .    -mat_block_size - size of the blocks to use
3055 
3056    Level: intermediate
3057 
3058    Notes:
3059    The number of rows and columns must be divisible by blocksize.
3060 
3061    If the nnz parameter is given then the nz parameter is ignored
3062 
3063    A nonzero block is any block that as 1 or more nonzeros in it
3064 
3065    The block AIJ format is fully compatible with standard Fortran 77
3066    storage.  That is, the stored row and column indices can begin at
3067    either one (as in Fortran) or zero.  See the users' manual for details.
3068 
3069    Specify the preallocated storage with either nz or nnz (not both).
3070    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3071    allocation.  For additional details, see the users manual chapter on
3072    matrices.
3073 
3074 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3075 @*/
3076 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3077 {
3078   PetscErrorCode ierr;
3079 
3080   PetscFunctionBegin;
3081   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3082   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3083   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3084   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3085   PetscFunctionReturn(0);
3086 }
3087 
3088 #undef __FUNCT__
3089 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3090 /*@C
3091    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3092    per row in the matrix. For good matrix assembly performance the
3093    user should preallocate the matrix storage by setting the parameter nz
3094    (or the array nnz).  By setting these parameters accurately, performance
3095    during matrix assembly can be increased by more than a factor of 50.
3096 
3097    Collective on MPI_Comm
3098 
3099    Input Parameters:
3100 +  A - the matrix
3101 .  bs - size of block
3102 .  nz - number of block nonzeros per block row (same for all rows)
3103 -  nnz - array containing the number of block nonzeros in the various block rows
3104          (possibly different for each block row) or PETSC_NULL
3105 
3106    Options Database Keys:
3107 .   -mat_no_unroll - uses code that does not unroll the loops in the
3108                      block calculations (much slower)
3109 .    -mat_block_size - size of the blocks to use
3110 
3111    Level: intermediate
3112 
3113    Notes:
3114    If the nnz parameter is given then the nz parameter is ignored
3115 
3116    You can call MatGetInfo() to get information on how effective the preallocation was;
3117    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3118    You can also run with the option -info and look for messages with the string
3119    malloc in them to see if additional memory allocation was needed.
3120 
3121    The block AIJ format is fully compatible with standard Fortran 77
3122    storage.  That is, the stored row and column indices can begin at
3123    either one (as in Fortran) or zero.  See the users' manual for details.
3124 
3125    Specify the preallocated storage with either nz or nnz (not both).
3126    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3127    allocation.  For additional details, see the users manual chapter on
3128    matrices.
3129 
3130 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3131 @*/
3132 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3133 {
3134   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3135 
3136   PetscFunctionBegin;
3137   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3138   if (f) {
3139     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3140   }
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3146 /*@
3147      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3148               (upper triangular entries in CSR format) provided by the user.
3149 
3150      Collective on MPI_Comm
3151 
3152    Input Parameters:
3153 +  comm - must be an MPI communicator of size 1
3154 .  bs - size of block
3155 .  m - number of rows
3156 .  n - number of columns
3157 .  i - row indices
3158 .  j - column indices
3159 -  a - matrix values
3160 
3161    Output Parameter:
3162 .  mat - the matrix
3163 
3164    Level: intermediate
3165 
3166    Notes:
3167        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3168     once the matrix is destroyed
3169 
3170        You cannot set new nonzero locations into this matrix, that will generate an error.
3171 
3172        The i and j indices are 0 based
3173 
3174 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3175 
3176 @*/
3177 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3178 {
3179   PetscErrorCode ierr;
3180   PetscInt       ii;
3181   Mat_SeqBAIJ    *baij;
3182 
3183   PetscFunctionBegin;
3184   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3185   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3186 
3187   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3188   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3189   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3190   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3191   baij = (Mat_SeqBAIJ*)(*mat)->data;
3192   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3193 
3194   baij->i = i;
3195   baij->j = j;
3196   baij->a = a;
3197   baij->singlemalloc = PETSC_FALSE;
3198   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3199   baij->free_a       = PETSC_FALSE;
3200   baij->free_ij       = PETSC_FALSE;
3201 
3202   for (ii=0; ii<m; ii++) {
3203     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3204 #if defined(PETSC_USE_DEBUG)
3205     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3206 #endif
3207   }
3208 #if defined(PETSC_USE_DEBUG)
3209   for (ii=0; ii<baij->i[m]; ii++) {
3210     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3211     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3212   }
3213 #endif
3214 
3215   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3216   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3217   PetscFunctionReturn(0);
3218 }
3219