xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d32f9abdbc052d6e1fd06679b17a55415c3aae30)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
52   PetscReal      shift = 0.0;
53 
54   PetscFunctionBegin;
55   if (a->idiagvalid) PetscFunctionReturn(0);
56   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
57   diag_offset = a->diag;
58   if (!a->idiag) {
59     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
60   }
61   diag  = a->idiag;
62   mdiag = a->idiag+bs*bs*mbs;
63   /* factor and invert each block */
64   switch (bs){
65     case 2:
66       for (i=0; i<mbs; i++) {
67         odiag   = v + 4*diag_offset[i];
68         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
70 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
71 	diag    += 4;
72 	mdiag   += 4;
73       }
74       break;
75     case 3:
76       for (i=0; i<mbs; i++) {
77         odiag    = v + 9*diag_offset[i];
78         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
79         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
80         diag[8]  = odiag[8];
81         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
82         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
83         mdiag[8] = odiag[8];
84 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
85 	diag    += 9;
86 	mdiag   += 9;
87       }
88       break;
89     case 4:
90       for (i=0; i<mbs; i++) {
91         odiag  = v + 16*diag_offset[i];
92         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
94 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
95 	diag  += 16;
96 	mdiag += 16;
97       }
98       break;
99     case 5:
100       for (i=0; i<mbs; i++) {
101         odiag = v + 25*diag_offset[i];
102         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
104 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
105 	diag  += 25;
106 	mdiag += 25;
107       }
108       break;
109     default:
110       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
111   }
112   a->idiagvalid = PETSC_TRUE;
113   PetscFunctionReturn(0);
114 }
115 EXTERN_C_END
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
119 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
120 {
121   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
122   PetscScalar        *x,x1,s1;
123   const PetscScalar  *b;
124   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
125   PetscErrorCode     ierr;
126   PetscInt           m = a->mbs,i,i2,nz,idx;
127   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
128 
129   PetscFunctionBegin;
130   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
131   its = its*lits;
132   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
133   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
134   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
135   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
136   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
137 
138   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
139 
140   diag  = a->diag;
141   idiag = a->idiag;
142   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
143   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
144 
145   if (flag & SOR_ZERO_INITIAL_GUESS) {
146     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
147       x[0] = b[0]*idiag[0];
148       i2     = 1;
149       idiag += 1;
150       for (i=1; i<m; i++) {
151         v     = aa + ai[i];
152         vi    = aj + ai[i];
153         nz    = diag[i] - ai[i];
154         s1    = b[i2];
155         while (nz--) {
156           idx  = (*vi++);
157           x1   = x[idx];
158           s1  -= v[0]*x1;
159           v   += 1;
160         }
161         x[i2]   = idiag[0]*s1;
162         idiag   += 1;
163         i2      += 1;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2];
174         x[i2]   = mdiag[0]*x1;
175         mdiag  += 1;
176         i2     += 1;
177       }
178       ierr = PetscLogFlops(m);CHKERRQ(ierr);
179     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
180       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
181     }
182     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
183       idiag   = a->idiag+a->mbs - 1;
184       i2      = m - 1;
185       x1      = x[i2];
186       x[i2]   = idiag[0]*x1;
187       idiag -= 1;
188       i2    -= 1;
189       for (i=m-2; i>=0; i--) {
190         v     = aa + (diag[i]+1);
191         vi    = aj + diag[i] + 1;
192         nz    = ai[i+1] - diag[i] - 1;
193         s1    = x[i2];
194         while (nz--) {
195           idx  = (*vi++);
196           x1   = x[idx];
197           s1  -= v[0]*x1;
198           v   += 1;
199         }
200         x[i2]   = idiag[0]*s1;
201         idiag   -= 2;
202         i2      -= 1;
203       }
204       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
205     }
206   } else {
207     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
208   }
209   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
210   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
211   PetscFunctionReturn(0);
212 }
213 
214 #undef __FUNCT__
215 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
216 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
217 {
218   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
219   PetscScalar        *x,x1,x2,s1,s2;
220   const PetscScalar  *b;
221   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
222   PetscErrorCode     ierr;
223   PetscInt           m = a->mbs,i,i2,nz,idx;
224   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
225 
226   PetscFunctionBegin;
227   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
228   its = its*lits;
229   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
230   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
231   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
232   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
233   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
234 
235   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
236 
237   diag  = a->diag;
238   idiag = a->idiag;
239   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
240   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
241 
242   if (flag & SOR_ZERO_INITIAL_GUESS) {
243     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
244       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
245       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
246       i2     = 2;
247       idiag += 4;
248       for (i=1; i<m; i++) {
249 	v     = aa + 4*ai[i];
250 	vi    = aj + ai[i];
251 	nz    = diag[i] - ai[i];
252 	s1    = b[i2]; s2 = b[i2+1];
253 	while (nz--) {
254 	  idx  = 2*(*vi++);
255 	  x1   = x[idx]; x2 = x[1+idx];
256 	  s1  -= v[0]*x1 + v[2]*x2;
257 	  s2  -= v[1]*x1 + v[3]*x2;
258 	  v   += 4;
259 	}
260 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
261 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
262         idiag   += 4;
263         i2      += 2;
264       }
265       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
266       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
267     }
268     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
269         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
270       i2    = 0;
271       mdiag = a->idiag+4*a->mbs;
272       for (i=0; i<m; i++) {
273         x1      = x[i2]; x2 = x[i2+1];
274         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
275         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
276         mdiag  += 4;
277         i2     += 2;
278       }
279       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
280     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
281       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
282     }
283     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
284       idiag   = a->idiag+4*a->mbs - 4;
285       i2      = 2*m - 2;
286       x1      = x[i2]; x2 = x[i2+1];
287       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
288       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
289       idiag -= 4;
290       i2    -= 2;
291       for (i=m-2; i>=0; i--) {
292 	v     = aa + 4*(diag[i]+1);
293 	vi    = aj + diag[i] + 1;
294 	nz    = ai[i+1] - diag[i] - 1;
295 	s1    = x[i2]; s2 = x[i2+1];
296 	while (nz--) {
297 	  idx  = 2*(*vi++);
298 	  x1   = x[idx]; x2 = x[1+idx];
299 	  s1  -= v[0]*x1 + v[2]*x2;
300 	  s2  -= v[1]*x1 + v[3]*x2;
301 	  v   += 4;
302 	}
303 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
304 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
305         idiag   -= 4;
306         i2      -= 2;
307       }
308       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
309     }
310   } else {
311     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
312   }
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
320 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
420     }
421   } else {
422     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
423   }
424   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
425   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
431 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
432 {
433   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
434   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
435   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
436   const PetscScalar  *b;
437   PetscErrorCode     ierr;
438   PetscInt           m = a->mbs,i,i2,nz,idx;
439   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
440 
441   PetscFunctionBegin;
442   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
443   its = its*lits;
444   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
445   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
446   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
447   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
448   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
449 
450   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
451 
452   diag  = a->diag;
453   idiag = a->idiag;
454   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
455   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
456 
457   if (flag & SOR_ZERO_INITIAL_GUESS) {
458     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
459       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
460       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
461       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
462       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
463       i2     = 4;
464       idiag += 16;
465       for (i=1; i<m; i++) {
466 	v     = aa + 16*ai[i];
467 	vi    = aj + ai[i];
468 	nz    = diag[i] - ai[i];
469 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
470 	while (nz--) {
471 	  idx  = 4*(*vi++);
472 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
473 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
474 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
475 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
476 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
477 	  v   += 16;
478 	}
479 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
480 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
481 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
482 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
483         idiag   += 16;
484         i2      += 4;
485       }
486       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
487       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
488     }
489     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
490         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
491       i2    = 0;
492       mdiag = a->idiag+16*a->mbs;
493       for (i=0; i<m; i++) {
494         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
495         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
496         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
497         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
498         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
499         mdiag  += 16;
500         i2     += 4;
501       }
502       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
503     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
504       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
505     }
506     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
507       idiag   = a->idiag+16*a->mbs - 16;
508       i2      = 4*m - 4;
509       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
510       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
511       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
512       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
513       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
514       idiag -= 16;
515       i2    -= 4;
516       for (i=m-2; i>=0; i--) {
517 	v     = aa + 16*(diag[i]+1);
518 	vi    = aj + diag[i] + 1;
519 	nz    = ai[i+1] - diag[i] - 1;
520 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
521 	while (nz--) {
522 	  idx  = 4*(*vi++);
523 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
524 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
525 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
526 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
527 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
528 	  v   += 16;
529 	}
530 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
531 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
532 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
533 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
534         idiag   -= 16;
535         i2      -= 4;
536       }
537       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
538     }
539   } else {
540     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
541   }
542   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
543   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
544   PetscFunctionReturn(0);
545 }
546 
547 #undef __FUNCT__
548 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
549 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
550 {
551   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
552   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
553   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
554   const PetscScalar  *b;
555   PetscErrorCode     ierr;
556   PetscInt           m = a->mbs,i,i2,nz,idx;
557   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
558 
559   PetscFunctionBegin;
560   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
561   its = its*lits;
562   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
563   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
564   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
565   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
566   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
567 
568   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
569 
570   diag  = a->diag;
571   idiag = a->idiag;
572   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
573   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
577       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
578       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
579       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
580       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
581       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
582       i2     = 5;
583       idiag += 25;
584       for (i=1; i<m; i++) {
585 	v     = aa + 25*ai[i];
586 	vi    = aj + ai[i];
587 	nz    = diag[i] - ai[i];
588 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
589 	while (nz--) {
590 	  idx  = 5*(*vi++);
591 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
592 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
593 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
594 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
595 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
596 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
597 	  v   += 25;
598 	}
599 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
600 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
601 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
602 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
603 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
604         idiag   += 25;
605         i2      += 5;
606       }
607       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
608       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
609     }
610     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
611         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
612       i2    = 0;
613       mdiag = a->idiag+25*a->mbs;
614       for (i=0; i<m; i++) {
615         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
616         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
617         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
618         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
619         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
620         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
621         mdiag  += 25;
622         i2     += 5;
623       }
624       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
625     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
626       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
627     }
628     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
629       idiag   = a->idiag+25*a->mbs - 25;
630       i2      = 5*m - 5;
631       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
632       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
633       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
634       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
635       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
636       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
637       idiag -= 25;
638       i2    -= 5;
639       for (i=m-2; i>=0; i--) {
640 	v     = aa + 25*(diag[i]+1);
641 	vi    = aj + diag[i] + 1;
642 	nz    = ai[i+1] - diag[i] - 1;
643 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
644 	while (nz--) {
645 	  idx  = 5*(*vi++);
646 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
647 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
648 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
649 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
650 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
651 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
652 	  v   += 25;
653 	}
654 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
655 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
656 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
657 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
658 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
659         idiag   -= 25;
660         i2      -= 5;
661       }
662       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
663     }
664   } else {
665     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
666   }
667   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
668   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
674 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
675 {
676   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
677   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
678   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
679   const PetscScalar  *b;
680   PetscErrorCode     ierr;
681   PetscInt           m = a->mbs,i,i2,nz,idx;
682   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
683 
684   PetscFunctionBegin;
685   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
686   its = its*lits;
687   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
688   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
689   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
690   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
691   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
692 
693   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
694 
695   diag  = a->diag;
696   idiag = a->idiag;
697   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
698   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
699 
700   if (flag & SOR_ZERO_INITIAL_GUESS) {
701     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
702       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
703       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
704       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
705       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
706       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
707       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
708       i2     = 6;
709       idiag += 36;
710       for (i=1; i<m; i++) {
711         v     = aa + 36*ai[i];
712         vi    = aj + ai[i];
713         nz    = diag[i] - ai[i];
714         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
715         while (nz--) {
716           idx  = 6*(*vi++);
717           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
718           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
719           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
720           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
721           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
722           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
723           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
724           v   += 36;
725         }
726         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
727         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
728         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
729         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
730         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
731         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
732         idiag   += 36;
733         i2      += 6;
734       }
735       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
736       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
737     }
738     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
739         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
740       i2    = 0;
741       mdiag = a->idiag+36*a->mbs;
742       for (i=0; i<m; i++) {
743         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
744         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
745         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
746         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
747         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
748         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
749         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
750         mdiag  += 36;
751         i2     += 6;
752       }
753       ierr = PetscLogFlops(60*m);CHKERRQ(ierr);
754     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
755       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
756     }
757     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
758       idiag   = a->idiag+36*a->mbs - 36;
759       i2      = 6*m - 6;
760       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
761       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
762       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
763       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
764       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
765       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
766       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
767       idiag -= 36;
768       i2    -= 6;
769       for (i=m-2; i>=0; i--) {
770         v     = aa + 36*(diag[i]+1);
771         vi    = aj + diag[i] + 1;
772         nz    = ai[i+1] - diag[i] - 1;
773         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
774         while (nz--) {
775           idx  = 6*(*vi++);
776           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
777           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
778           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
779           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
780           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
781           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
782           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
783           v   += 36;
784         }
785         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
786         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
787         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
788         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
789         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
790         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
791         idiag   -= 36;
792         i2      -= 6;
793       }
794       ierr = PetscLogFlops(36*(a->nz));CHKERRQ(ierr);
795     }
796   } else {
797     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
798   }
799   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
800   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
806 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
807 {
808   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
809   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
810   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
811   const PetscScalar  *b;
812   PetscErrorCode     ierr;
813   PetscInt           m = a->mbs,i,i2,nz,idx;
814   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
815 
816   PetscFunctionBegin;
817   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
818   its = its*lits;
819   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
821   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
822   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
823   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
824 
825   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
826 
827   diag  = a->diag;
828   idiag = a->idiag;
829   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
830   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
831 
832   if (flag & SOR_ZERO_INITIAL_GUESS) {
833     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
834       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
835       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
836       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
837       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
838       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
839       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
840       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
841       i2     = 7;
842       idiag += 49;
843       for (i=1; i<m; i++) {
844         v     = aa + 49*ai[i];
845         vi    = aj + ai[i];
846         nz    = diag[i] - ai[i];
847         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
848         while (nz--) {
849           idx  = 7*(*vi++);
850           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
851           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
852           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
853           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
854           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
855           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
856           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
857           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
858           v   += 49;
859         }
860         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
861         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
862         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
863         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
864         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
865         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
866         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
867         idiag   += 49;
868         i2      += 7;
869       }
870       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
871       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
872     }
873     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
874         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
875       i2    = 0;
876       mdiag = a->idiag+49*a->mbs;
877       for (i=0; i<m; i++) {
878         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
879         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
880         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
881         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
882         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
883         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
884         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
885         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
886         mdiag  += 36;
887         i2     += 6;
888       }
889       ierr = PetscLogFlops(93*m);CHKERRQ(ierr);
890     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
891       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
892     }
893     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
894       idiag   = a->idiag+49*a->mbs - 49;
895       i2      = 7*m - 7;
896       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
897       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
898       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
899       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
900       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
901       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
902       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
903       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
904       idiag -= 49;
905       i2    -= 7;
906       for (i=m-2; i>=0; i--) {
907         v     = aa + 49*(diag[i]+1);
908         vi    = aj + diag[i] + 1;
909         nz    = ai[i+1] - diag[i] - 1;
910         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
911         while (nz--) {
912           idx  = 7*(*vi++);
913           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
914           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
915           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
916           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
917           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
918           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
919           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
920           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
921           v   += 49;
922         }
923         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
924         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
925         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
926         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
927         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
928         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
929         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
930         idiag   -= 49;
931         i2      -= 7;
932       }
933       ierr = PetscLogFlops(49*(a->nz));CHKERRQ(ierr);
934     }
935   } else {
936     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
937   }
938   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
939   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
940   PetscFunctionReturn(0);
941 }
942 
943 /*
944     Special version for direct calls from Fortran (Used in PETSc-fun3d)
945 */
946 #if defined(PETSC_HAVE_FORTRAN_CAPS)
947 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
948 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
949 #define matsetvaluesblocked4_ matsetvaluesblocked4
950 #endif
951 
952 EXTERN_C_BEGIN
953 #undef __FUNCT__
954 #define __FUNCT__ "matsetvaluesblocked4_"
955 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
956 {
957   Mat               A = *AA;
958   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
959   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
960   PetscInt          *ai=a->i,*ailen=a->ilen;
961   PetscInt          *aj=a->j,stepval,lastcol = -1;
962   const PetscScalar *value = v;
963   MatScalar         *ap,*aa = a->a,*bap;
964 
965   PetscFunctionBegin;
966   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
967   stepval = (n-1)*4;
968   for (k=0; k<m; k++) { /* loop over added rows */
969     row  = im[k];
970     rp   = aj + ai[row];
971     ap   = aa + 16*ai[row];
972     nrow = ailen[row];
973     low  = 0;
974     high = nrow;
975     for (l=0; l<n; l++) { /* loop over added columns */
976       col = in[l];
977       if (col <= lastcol) low = 0; else high = nrow;
978       lastcol = col;
979       value = v + k*(stepval+4 + l)*4;
980       while (high-low > 7) {
981         t = (low+high)/2;
982         if (rp[t] > col) high = t;
983         else             low  = t;
984       }
985       for (i=low; i<high; i++) {
986         if (rp[i] > col) break;
987         if (rp[i] == col) {
988           bap  = ap +  16*i;
989           for (ii=0; ii<4; ii++,value+=stepval) {
990             for (jj=ii; jj<16; jj+=4) {
991               bap[jj] += *value++;
992             }
993           }
994           goto noinsert2;
995         }
996       }
997       N = nrow++ - 1;
998       high++; /* added new column index thus must search to one higher than before */
999       /* shift up all the later entries in this row */
1000       for (ii=N; ii>=i; ii--) {
1001         rp[ii+1] = rp[ii];
1002         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1003       }
1004       if (N >= i) {
1005         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1006       }
1007       rp[i] = col;
1008       bap   = ap +  16*i;
1009       for (ii=0; ii<4; ii++,value+=stepval) {
1010         for (jj=ii; jj<16; jj+=4) {
1011           bap[jj] = *value++;
1012         }
1013       }
1014       noinsert2:;
1015       low = i;
1016     }
1017     ailen[row] = nrow;
1018   }
1019   PetscFunctionReturnVoid();
1020 }
1021 EXTERN_C_END
1022 
1023 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1024 #define matsetvalues4_ MATSETVALUES4
1025 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1026 #define matsetvalues4_ matsetvalues4
1027 #endif
1028 
1029 EXTERN_C_BEGIN
1030 #undef __FUNCT__
1031 #define __FUNCT__ "MatSetValues4_"
1032 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1033 {
1034   Mat         A = *AA;
1035   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1036   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1037   PetscInt    *ai=a->i,*ailen=a->ilen;
1038   PetscInt    *aj=a->j,brow,bcol;
1039   PetscInt    ridx,cidx,lastcol = -1;
1040   MatScalar   *ap,value,*aa=a->a,*bap;
1041 
1042   PetscFunctionBegin;
1043   for (k=0; k<m; k++) { /* loop over added rows */
1044     row  = im[k]; brow = row/4;
1045     rp   = aj + ai[brow];
1046     ap   = aa + 16*ai[brow];
1047     nrow = ailen[brow];
1048     low  = 0;
1049     high = nrow;
1050     for (l=0; l<n; l++) { /* loop over added columns */
1051       col = in[l]; bcol = col/4;
1052       ridx = row % 4; cidx = col % 4;
1053       value = v[l + k*n];
1054       if (col <= lastcol) low = 0; else high = nrow;
1055       lastcol = col;
1056       while (high-low > 7) {
1057         t = (low+high)/2;
1058         if (rp[t] > bcol) high = t;
1059         else              low  = t;
1060       }
1061       for (i=low; i<high; i++) {
1062         if (rp[i] > bcol) break;
1063         if (rp[i] == bcol) {
1064           bap  = ap +  16*i + 4*cidx + ridx;
1065           *bap += value;
1066           goto noinsert1;
1067         }
1068       }
1069       N = nrow++ - 1;
1070       high++; /* added new column thus must search to one higher than before */
1071       /* shift up all the later entries in this row */
1072       for (ii=N; ii>=i; ii--) {
1073         rp[ii+1] = rp[ii];
1074         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1075       }
1076       if (N>=i) {
1077         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1078       }
1079       rp[i]                    = bcol;
1080       ap[16*i + 4*cidx + ridx] = value;
1081       noinsert1:;
1082       low = i;
1083     }
1084     ailen[brow] = nrow;
1085   }
1086   PetscFunctionReturnVoid();
1087 }
1088 EXTERN_C_END
1089 
1090 /*
1091      Checks for missing diagonals
1092 */
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1095 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1096 {
1097   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099   PetscInt       *diag,*jj = a->j,i;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1103   *missing = PETSC_FALSE;
1104   diag     = a->diag;
1105   for (i=0; i<a->mbs; i++) {
1106     if (jj[diag[i]] != i) {
1107       *missing  = PETSC_TRUE;
1108       if (d) *d = i;
1109     }
1110   }
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1116 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1117 {
1118   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120   PetscInt       i,j,m = a->mbs;
1121 
1122   PetscFunctionBegin;
1123   if (!a->diag) {
1124     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1125   }
1126   for (i=0; i<m; i++) {
1127     a->diag[i] = a->i[i+1];
1128     for (j=a->i[i]; j<a->i[i+1]; j++) {
1129       if (a->j[j] == i) {
1130         a->diag[i] = j;
1131         break;
1132       }
1133     }
1134   }
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 
1139 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1143 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1144 {
1145   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
1148   PetscInt       *tia, *tja;
1149 
1150   PetscFunctionBegin;
1151   *nn = n;
1152   if (!ia) PetscFunctionReturn(0);
1153   if (symmetric) {
1154     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1155   } else {
1156     tia = a->i; tja = a->j;
1157   }
1158 
1159   if (!blockcompressed && bs > 1) {
1160     (*nn) *= bs;
1161     nbs    = bs;
1162     /* malloc & create the natural set of indices */
1163     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1164     if (n) {
1165       (*ia)[0] = 0;
1166       for (j=1; j<bs; j++) {
1167         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1168       }
1169     }
1170 
1171     for (i=1; i<n; i++) {
1172       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1173       for (j=1; j<bs; j++) {
1174         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1175       }
1176     }
1177     if (n) {
1178       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1179     }
1180 
1181     if (ja) {
1182       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1183       cnt = 0;
1184       for (i=0; i<n; i++) {
1185         for (j=0; j<bs; j++) {
1186           for (k=tia[i]; k<tia[i+1]; k++) {
1187             for (l=0; l<bs; l++) {
1188               (*ja)[cnt++] = bs*tja[k] + l;
1189 	    }
1190           }
1191         }
1192       }
1193     }
1194 
1195     n     *= bs;
1196     nz *= bs*bs;
1197     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1198       ierr = PetscFree(tia);CHKERRQ(ierr);
1199       ierr = PetscFree(tja);CHKERRQ(ierr);
1200     }
1201   } else {
1202     *ia = tia;
1203     if (ja) *ja = tja;
1204   }
1205   if (oshift == 1) {
1206     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1207     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1208   }
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 #undef __FUNCT__
1213 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1214 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1215 {
1216   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1217   PetscErrorCode ierr;
1218   PetscInt       i,n = a->mbs,nz = a->i[n];
1219 
1220   PetscFunctionBegin;
1221   if (!ia) PetscFunctionReturn(0);
1222   if (!blockcompressed && A->rmap.bs > 1) {
1223     ierr = PetscFree(*ia);CHKERRQ(ierr);
1224     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1225   } else if (symmetric) {
1226     ierr = PetscFree(*ia);CHKERRQ(ierr);
1227     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1228   } else if (oshift == 1) { /* blockcompressed */
1229     for (i=0; i<n+1; i++) a->i[i]--;
1230     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1231   }
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1237 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243 #if defined(PETSC_USE_LOG)
1244   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
1245 #endif
1246   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1247   if (a->row) {
1248     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1249   }
1250   if (a->col) {
1251     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1252   }
1253   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1254   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1255   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1256   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1257   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1258   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1259   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1260   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1261   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1262 
1263   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1264   ierr = PetscFree(a);CHKERRQ(ierr);
1265 
1266   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1267   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1268   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1269   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1270   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1271   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1272   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1273   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1274   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1280 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284 
1285   PetscFunctionBegin;
1286   switch (op) {
1287   case MAT_ROW_ORIENTED:
1288     a->roworiented    = flg;
1289     break;
1290   case MAT_KEEP_ZEROED_ROWS:
1291     a->keepzeroedrows = flg;
1292     break;
1293   case MAT_NEW_NONZERO_LOCATIONS:
1294     a->nonew          = (flg ? 0 : 1);
1295     break;
1296   case MAT_NEW_NONZERO_LOCATION_ERR:
1297     a->nonew          = (flg ? -1 : 0);
1298     break;
1299   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1300     a->nonew          = (flg ? -2 : 0);
1301     break;
1302   case MAT_NEW_DIAGONALS:
1303   case MAT_IGNORE_OFF_PROC_ENTRIES:
1304   case MAT_USE_HASH_TABLE:
1305     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1306     break;
1307   case MAT_SYMMETRIC:
1308   case MAT_STRUCTURALLY_SYMMETRIC:
1309   case MAT_HERMITIAN:
1310   case MAT_SYMMETRY_ETERNAL:
1311     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1312     break;
1313   default:
1314     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1315   }
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1326   MatScalar      *aa,*aa_i;
1327   PetscScalar    *v_i;
1328 
1329   PetscFunctionBegin;
1330   bs  = A->rmap.bs;
1331   ai  = a->i;
1332   aj  = a->j;
1333   aa  = a->a;
1334   bs2 = a->bs2;
1335 
1336   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1337 
1338   bn  = row/bs;   /* Block number */
1339   bp  = row % bs; /* Block Position */
1340   M   = ai[bn+1] - ai[bn];
1341   *nz = bs*M;
1342 
1343   if (v) {
1344     *v = 0;
1345     if (*nz) {
1346       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1347       for (i=0; i<M; i++) { /* for each block in the block row */
1348         v_i  = *v + i*bs;
1349         aa_i = aa + bs2*(ai[bn] + i);
1350         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1351       }
1352     }
1353   }
1354 
1355   if (idx) {
1356     *idx = 0;
1357     if (*nz) {
1358       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1359       for (i=0; i<M; i++) { /* for each block in the block row */
1360         idx_i = *idx + i*bs;
1361         itmp  = bs*aj[ai[bn] + i];
1362         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1363       }
1364     }
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1371 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1377   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 #undef __FUNCT__
1382 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1383 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1384 {
1385   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1386   Mat            C;
1387   PetscErrorCode ierr;
1388   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1389   PetscInt       *rows,*cols,bs2=a->bs2;
1390   MatScalar      *array;
1391 
1392   PetscFunctionBegin;
1393   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1394   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1395     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1396     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1397 
1398     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1399     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1400     ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1401     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1402     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1403     ierr = PetscFree(col);CHKERRQ(ierr);
1404   } else {
1405     C = *B;
1406   }
1407 
1408   array = a->a;
1409   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1410   cols = rows + bs;
1411   for (i=0; i<mbs; i++) {
1412     cols[0] = i*bs;
1413     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1414     len = ai[i+1] - ai[i];
1415     for (j=0; j<len; j++) {
1416       rows[0] = (*aj++)*bs;
1417       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1418       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1419       array += bs2;
1420     }
1421   }
1422   ierr = PetscFree(rows);CHKERRQ(ierr);
1423 
1424   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1425   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1426 
1427   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1428     *B = C;
1429   } else {
1430     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1437 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1438 {
1439   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1440   PetscErrorCode ierr;
1441   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1442   int            fd;
1443   PetscScalar    *aa;
1444   FILE           *file;
1445 
1446   PetscFunctionBegin;
1447   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1448   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1449   col_lens[0] = MAT_FILE_COOKIE;
1450 
1451   col_lens[1] = A->rmap.N;
1452   col_lens[2] = A->cmap.n;
1453   col_lens[3] = a->nz*bs2;
1454 
1455   /* store lengths of each row and write (including header) to file */
1456   count = 0;
1457   for (i=0; i<a->mbs; i++) {
1458     for (j=0; j<bs; j++) {
1459       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1460     }
1461   }
1462   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1463   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1464 
1465   /* store column indices (zero start index) */
1466   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1467   count = 0;
1468   for (i=0; i<a->mbs; i++) {
1469     for (j=0; j<bs; j++) {
1470       for (k=a->i[i]; k<a->i[i+1]; k++) {
1471         for (l=0; l<bs; l++) {
1472           jj[count++] = bs*a->j[k] + l;
1473         }
1474       }
1475     }
1476   }
1477   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1478   ierr = PetscFree(jj);CHKERRQ(ierr);
1479 
1480   /* store nonzero values */
1481   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1482   count = 0;
1483   for (i=0; i<a->mbs; i++) {
1484     for (j=0; j<bs; j++) {
1485       for (k=a->i[i]; k<a->i[i+1]; k++) {
1486         for (l=0; l<bs; l++) {
1487           aa[count++] = a->a[bs2*k + l*bs + j];
1488         }
1489       }
1490     }
1491   }
1492   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1493   ierr = PetscFree(aa);CHKERRQ(ierr);
1494 
1495   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1496   if (file) {
1497     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1504 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1505 {
1506   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1507   PetscErrorCode    ierr;
1508   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1509   PetscViewerFormat format;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1513   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1514     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1515   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1516     Mat aij;
1517     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1518     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1519     ierr = MatDestroy(aij);CHKERRQ(ierr);
1520   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1521      PetscFunctionReturn(0);
1522   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1523     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1524     for (i=0; i<a->mbs; i++) {
1525       for (j=0; j<bs; j++) {
1526         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1527         for (k=a->i[i]; k<a->i[i+1]; k++) {
1528           for (l=0; l<bs; l++) {
1529 #if defined(PETSC_USE_COMPLEX)
1530             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1532                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1535                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             }
1539 #else
1540             if (a->a[bs2*k + l*bs + j] != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1542             }
1543 #endif
1544           }
1545         }
1546         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1547       }
1548     }
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1550   } else {
1551     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1552     for (i=0; i<a->mbs; i++) {
1553       for (j=0; j<bs; j++) {
1554         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1555         for (k=a->i[i]; k<a->i[i+1]; k++) {
1556           for (l=0; l<bs; l++) {
1557 #if defined(PETSC_USE_COMPLEX)
1558             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1560                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1563                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             }
1567 #else
1568             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1569 #endif
1570           }
1571         }
1572         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1573       }
1574     }
1575     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1576   }
1577   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1578   PetscFunctionReturn(0);
1579 }
1580 
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1583 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1584 {
1585   Mat            A = (Mat) Aa;
1586   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1587   PetscErrorCode ierr;
1588   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1589   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1590   MatScalar      *aa;
1591   PetscViewer    viewer;
1592 
1593   PetscFunctionBegin;
1594 
1595   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1596   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1597 
1598   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1599 
1600   /* loop over matrix elements drawing boxes */
1601   color = PETSC_DRAW_BLUE;
1602   for (i=0,row=0; i<mbs; i++,row+=bs) {
1603     for (j=a->i[i]; j<a->i[i+1]; j++) {
1604       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1605       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606       aa = a->a + j*bs2;
1607       for (k=0; k<bs; k++) {
1608         for (l=0; l<bs; l++) {
1609           if (PetscRealPart(*aa++) >=  0.) continue;
1610           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611         }
1612       }
1613     }
1614   }
1615   color = PETSC_DRAW_CYAN;
1616   for (i=0,row=0; i<mbs; i++,row+=bs) {
1617     for (j=a->i[i]; j<a->i[i+1]; j++) {
1618       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1619       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620       aa = a->a + j*bs2;
1621       for (k=0; k<bs; k++) {
1622         for (l=0; l<bs; l++) {
1623           if (PetscRealPart(*aa++) != 0.) continue;
1624           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625         }
1626       }
1627     }
1628   }
1629 
1630   color = PETSC_DRAW_RED;
1631   for (i=0,row=0; i<mbs; i++,row+=bs) {
1632     for (j=a->i[i]; j<a->i[i+1]; j++) {
1633       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1634       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635       aa = a->a + j*bs2;
1636       for (k=0; k<bs; k++) {
1637         for (l=0; l<bs; l++) {
1638           if (PetscRealPart(*aa++) <= 0.) continue;
1639           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640         }
1641       }
1642     }
1643   }
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1649 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1650 {
1651   PetscErrorCode ierr;
1652   PetscReal      xl,yl,xr,yr,w,h;
1653   PetscDraw      draw;
1654   PetscTruth     isnull;
1655 
1656   PetscFunctionBegin;
1657 
1658   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1659   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1660 
1661   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1662   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1663   xr += w;    yr += h;  xl = -w;     yl = -h;
1664   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1665   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1666   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1667   PetscFunctionReturn(0);
1668 }
1669 
1670 #undef __FUNCT__
1671 #define __FUNCT__ "MatView_SeqBAIJ"
1672 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1673 {
1674   PetscErrorCode ierr;
1675   PetscTruth     iascii,isbinary,isdraw;
1676 
1677   PetscFunctionBegin;
1678   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1681   if (iascii){
1682     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1683   } else if (isbinary) {
1684     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1685   } else if (isdraw) {
1686     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1687   } else {
1688     Mat B;
1689     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1690     ierr = MatView(B,viewer);CHKERRQ(ierr);
1691     ierr = MatDestroy(B);CHKERRQ(ierr);
1692   }
1693   PetscFunctionReturn(0);
1694 }
1695 
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1699 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1700 {
1701   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1702   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1703   PetscInt    *ai = a->i,*ailen = a->ilen;
1704   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1705   MatScalar   *ap,*aa = a->a;
1706 
1707   PetscFunctionBegin;
1708   for (k=0; k<m; k++) { /* loop over rows */
1709     row  = im[k]; brow = row/bs;
1710     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1711     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1712     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1713     nrow = ailen[brow];
1714     for (l=0; l<n; l++) { /* loop over columns */
1715       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1716       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1717       col  = in[l] ;
1718       bcol = col/bs;
1719       cidx = col%bs;
1720       ridx = row%bs;
1721       high = nrow;
1722       low  = 0; /* assume unsorted */
1723       while (high-low > 5) {
1724         t = (low+high)/2;
1725         if (rp[t] > bcol) high = t;
1726         else             low  = t;
1727       }
1728       for (i=low; i<high; i++) {
1729         if (rp[i] > bcol) break;
1730         if (rp[i] == bcol) {
1731           *v++ = ap[bs2*i+bs*cidx+ridx];
1732           goto finished;
1733         }
1734       }
1735       *v++ = 0.0;
1736       finished:;
1737     }
1738   }
1739   PetscFunctionReturn(0);
1740 }
1741 
1742 #define CHUNKSIZE 10
1743 #undef __FUNCT__
1744 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1745 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1746 {
1747   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1748   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1749   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1750   PetscErrorCode    ierr;
1751   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1752   PetscTruth        roworiented=a->roworiented;
1753   const PetscScalar *value = v;
1754   MatScalar         *ap,*aa = a->a,*bap;
1755 
1756   PetscFunctionBegin;
1757   if (roworiented) {
1758     stepval = (n-1)*bs;
1759   } else {
1760     stepval = (m-1)*bs;
1761   }
1762   for (k=0; k<m; k++) { /* loop over added rows */
1763     row  = im[k];
1764     if (row < 0) continue;
1765 #if defined(PETSC_USE_DEBUG)
1766     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1767 #endif
1768     rp   = aj + ai[row];
1769     ap   = aa + bs2*ai[row];
1770     rmax = imax[row];
1771     nrow = ailen[row];
1772     low  = 0;
1773     high = nrow;
1774     for (l=0; l<n; l++) { /* loop over added columns */
1775       if (in[l] < 0) continue;
1776 #if defined(PETSC_USE_DEBUG)
1777       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1778 #endif
1779       col = in[l];
1780       if (roworiented) {
1781         value = v + k*(stepval+bs)*bs + l*bs;
1782       } else {
1783         value = v + l*(stepval+bs)*bs + k*bs;
1784       }
1785       if (col <= lastcol) low = 0; else high = nrow;
1786       lastcol = col;
1787       while (high-low > 7) {
1788         t = (low+high)/2;
1789         if (rp[t] > col) high = t;
1790         else             low  = t;
1791       }
1792       for (i=low; i<high; i++) {
1793         if (rp[i] > col) break;
1794         if (rp[i] == col) {
1795           bap  = ap +  bs2*i;
1796           if (roworiented) {
1797             if (is == ADD_VALUES) {
1798               for (ii=0; ii<bs; ii++,value+=stepval) {
1799                 for (jj=ii; jj<bs2; jj+=bs) {
1800                   bap[jj] += *value++;
1801                 }
1802               }
1803             } else {
1804               for (ii=0; ii<bs; ii++,value+=stepval) {
1805                 for (jj=ii; jj<bs2; jj+=bs) {
1806                   bap[jj] = *value++;
1807                 }
1808               }
1809             }
1810           } else {
1811             if (is == ADD_VALUES) {
1812               for (ii=0; ii<bs; ii++,value+=stepval) {
1813                 for (jj=0; jj<bs; jj++) {
1814                   *bap++ += *value++;
1815                 }
1816               }
1817             } else {
1818               for (ii=0; ii<bs; ii++,value+=stepval) {
1819                 for (jj=0; jj<bs; jj++) {
1820                   *bap++  = *value++;
1821                 }
1822               }
1823             }
1824           }
1825           goto noinsert2;
1826         }
1827       }
1828       if (nonew == 1) goto noinsert2;
1829       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1830       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1831       N = nrow++ - 1; high++;
1832       /* shift up all the later entries in this row */
1833       for (ii=N; ii>=i; ii--) {
1834         rp[ii+1] = rp[ii];
1835         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1836       }
1837       if (N >= i) {
1838         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1839       }
1840       rp[i] = col;
1841       bap   = ap +  bs2*i;
1842       if (roworiented) {
1843         for (ii=0; ii<bs; ii++,value+=stepval) {
1844           for (jj=ii; jj<bs2; jj+=bs) {
1845             bap[jj] = *value++;
1846           }
1847         }
1848       } else {
1849         for (ii=0; ii<bs; ii++,value+=stepval) {
1850           for (jj=0; jj<bs; jj++) {
1851             *bap++  = *value++;
1852           }
1853         }
1854       }
1855       noinsert2:;
1856       low = i;
1857     }
1858     ailen[row] = nrow;
1859   }
1860   PetscFunctionReturn(0);
1861 }
1862 
1863 #undef __FUNCT__
1864 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1865 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1866 {
1867   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1868   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1869   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1870   PetscErrorCode ierr;
1871   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1872   MatScalar      *aa = a->a,*ap;
1873   PetscReal      ratio=0.6;
1874 
1875   PetscFunctionBegin;
1876   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1877 
1878   if (m) rmax = ailen[0];
1879   for (i=1; i<mbs; i++) {
1880     /* move each row back by the amount of empty slots (fshift) before it*/
1881     fshift += imax[i-1] - ailen[i-1];
1882     rmax   = PetscMax(rmax,ailen[i]);
1883     if (fshift) {
1884       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1885       N = ailen[i];
1886       for (j=0; j<N; j++) {
1887         ip[j-fshift] = ip[j];
1888         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1889       }
1890     }
1891     ai[i] = ai[i-1] + ailen[i-1];
1892   }
1893   if (mbs) {
1894     fshift += imax[mbs-1] - ailen[mbs-1];
1895     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1896   }
1897   /* reset ilen and imax for each row */
1898   for (i=0; i<mbs; i++) {
1899     ailen[i] = imax[i] = ai[i+1] - ai[i];
1900   }
1901   a->nz = ai[mbs];
1902 
1903   /* diagonals may have moved, so kill the diagonal pointers */
1904   a->idiagvalid = PETSC_FALSE;
1905   if (fshift && a->diag) {
1906     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1907     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1908     a->diag = 0;
1909   }
1910   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1911   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1912   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1913   a->reallocs          = 0;
1914   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1915 
1916   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1917   if (a->compressedrow.use){
1918     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1919   }
1920 
1921   A->same_nonzero = PETSC_TRUE;
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 /*
1926    This function returns an array of flags which indicate the locations of contiguous
1927    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1928    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1929    Assume: sizes should be long enough to hold all the values.
1930 */
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1933 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1934 {
1935   PetscInt   i,j,k,row;
1936   PetscTruth flg;
1937 
1938   PetscFunctionBegin;
1939   for (i=0,j=0; i<n; j++) {
1940     row = idx[i];
1941     if (row%bs!=0) { /* Not the begining of a block */
1942       sizes[j] = 1;
1943       i++;
1944     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1945       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1946       i++;
1947     } else { /* Begining of the block, so check if the complete block exists */
1948       flg = PETSC_TRUE;
1949       for (k=1; k<bs; k++) {
1950         if (row+k != idx[i+k]) { /* break in the block */
1951           flg = PETSC_FALSE;
1952           break;
1953         }
1954       }
1955       if (flg) { /* No break in the bs */
1956         sizes[j] = bs;
1957         i+= bs;
1958       } else {
1959         sizes[j] = 1;
1960         i++;
1961       }
1962     }
1963   }
1964   *bs_max = j;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1970 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1971 {
1972   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1973   PetscErrorCode ierr;
1974   PetscInt       i,j,k,count,*rows;
1975   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1976   PetscScalar    zero = 0.0;
1977   MatScalar      *aa;
1978 
1979   PetscFunctionBegin;
1980   /* Make a copy of the IS and  sort it */
1981   /* allocate memory for rows,sizes */
1982   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1983   sizes = rows + is_n;
1984 
1985   /* copy IS values to rows, and sort them */
1986   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1987   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1988   if (baij->keepzeroedrows) {
1989     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1990     bs_max = is_n;
1991     A->same_nonzero = PETSC_TRUE;
1992   } else {
1993     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1994     A->same_nonzero = PETSC_FALSE;
1995   }
1996 
1997   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1998     row   = rows[j];
1999     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2000     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2001     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2002     if (sizes[i] == bs && !baij->keepzeroedrows) {
2003       if (diag != 0.0) {
2004         if (baij->ilen[row/bs] > 0) {
2005           baij->ilen[row/bs]       = 1;
2006           baij->j[baij->i[row/bs]] = row/bs;
2007           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2008         }
2009         /* Now insert all the diagonal values for this bs */
2010         for (k=0; k<bs; k++) {
2011           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2012         }
2013       } else { /* (diag == 0.0) */
2014         baij->ilen[row/bs] = 0;
2015       } /* end (diag == 0.0) */
2016     } else { /* (sizes[i] != bs) */
2017 #if defined (PETSC_USE_DEBUG)
2018       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2019 #endif
2020       for (k=0; k<count; k++) {
2021         aa[0] =  zero;
2022         aa    += bs;
2023       }
2024       if (diag != 0.0) {
2025         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2026       }
2027     }
2028   }
2029 
2030   ierr = PetscFree(rows);CHKERRQ(ierr);
2031   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2037 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2038 {
2039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2040   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2041   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2042   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
2043   PetscErrorCode ierr;
2044   PetscInt       ridx,cidx,bs2=a->bs2;
2045   PetscTruth     roworiented=a->roworiented;
2046   MatScalar      *ap,value,*aa=a->a,*bap;
2047 
2048   PetscFunctionBegin;
2049   for (k=0; k<m; k++) { /* loop over added rows */
2050     row  = im[k];
2051     brow = row/bs;
2052     if (row < 0) continue;
2053 #if defined(PETSC_USE_DEBUG)
2054     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
2055 #endif
2056     rp   = aj + ai[brow];
2057     ap   = aa + bs2*ai[brow];
2058     rmax = imax[brow];
2059     nrow = ailen[brow];
2060     low  = 0;
2061     high = nrow;
2062     for (l=0; l<n; l++) { /* loop over added columns */
2063       if (in[l] < 0) continue;
2064 #if defined(PETSC_USE_DEBUG)
2065       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
2066 #endif
2067       col = in[l]; bcol = col/bs;
2068       ridx = row % bs; cidx = col % bs;
2069       if (roworiented) {
2070         value = v[l + k*n];
2071       } else {
2072         value = v[k + l*m];
2073       }
2074       if (col <= lastcol) low = 0; else high = nrow;
2075       lastcol = col;
2076       while (high-low > 7) {
2077         t = (low+high)/2;
2078         if (rp[t] > bcol) high = t;
2079         else              low  = t;
2080       }
2081       for (i=low; i<high; i++) {
2082         if (rp[i] > bcol) break;
2083         if (rp[i] == bcol) {
2084           bap  = ap +  bs2*i + bs*cidx + ridx;
2085           if (is == ADD_VALUES) *bap += value;
2086           else                  *bap  = value;
2087           goto noinsert1;
2088         }
2089       }
2090       if (nonew == 1) goto noinsert1;
2091       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2092       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2093       N = nrow++ - 1; high++;
2094       /* shift up all the later entries in this row */
2095       for (ii=N; ii>=i; ii--) {
2096         rp[ii+1] = rp[ii];
2097         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2098       }
2099       if (N>=i) {
2100         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2101       }
2102       rp[i]                      = bcol;
2103       ap[bs2*i + bs*cidx + ridx] = value;
2104       a->nz++;
2105       noinsert1:;
2106       low = i;
2107     }
2108     ailen[brow] = nrow;
2109   }
2110   A->same_nonzero = PETSC_FALSE;
2111   PetscFunctionReturn(0);
2112 }
2113 
2114 
2115 #undef __FUNCT__
2116 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2117 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
2118 {
2119   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2120   Mat            outA;
2121   PetscErrorCode ierr;
2122   PetscTruth     row_identity,col_identity;
2123 
2124   PetscFunctionBegin;
2125   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2126   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2127   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2128   if (!row_identity || !col_identity) {
2129     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2130   }
2131 
2132   outA          = inA;
2133   inA->factor   = MAT_FACTOR_LU;
2134 
2135   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2136 
2137   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2138   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2139   a->row = row;
2140   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2141   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2142   a->col = col;
2143 
2144   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2145   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2147 
2148   /*
2149       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
2150       for ILU(0) factorization with natural ordering
2151   */
2152   if (inA->rmap.bs < 8) {
2153     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
2154   } else {
2155     if (!a->solve_work) {
2156       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2157       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2158     }
2159   }
2160 
2161   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
2162 
2163   PetscFunctionReturn(0);
2164 }
2165 
2166 EXTERN_C_BEGIN
2167 #undef __FUNCT__
2168 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2169 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2170 {
2171   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2172   PetscInt    i,nz,nbs;
2173 
2174   PetscFunctionBegin;
2175   nz  = baij->maxnz/baij->bs2;
2176   nbs = baij->nbs;
2177   for (i=0; i<nz; i++) {
2178     baij->j[i] = indices[i];
2179   }
2180   baij->nz = nz;
2181   for (i=0; i<nbs; i++) {
2182     baij->ilen[i] = baij->imax[i];
2183   }
2184 
2185   PetscFunctionReturn(0);
2186 }
2187 EXTERN_C_END
2188 
2189 #undef __FUNCT__
2190 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2191 /*@
2192     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2193        in the matrix.
2194 
2195   Input Parameters:
2196 +  mat - the SeqBAIJ matrix
2197 -  indices - the column indices
2198 
2199   Level: advanced
2200 
2201   Notes:
2202     This can be called if you have precomputed the nonzero structure of the
2203   matrix and want to provide it to the matrix object to improve the performance
2204   of the MatSetValues() operation.
2205 
2206     You MUST have set the correct numbers of nonzeros per row in the call to
2207   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2208 
2209     MUST be called before any calls to MatSetValues();
2210 
2211 @*/
2212 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2213 {
2214   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2215 
2216   PetscFunctionBegin;
2217   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2218   PetscValidPointer(indices,2);
2219   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2220   if (f) {
2221     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2222   } else {
2223     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2224   }
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2230 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2231 {
2232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2233   PetscErrorCode ierr;
2234   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2235   PetscReal      atmp;
2236   PetscScalar    *x,zero = 0.0;
2237   MatScalar      *aa;
2238   PetscInt       ncols,brow,krow,kcol;
2239 
2240   PetscFunctionBegin;
2241   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2242   bs   = A->rmap.bs;
2243   aa   = a->a;
2244   ai   = a->i;
2245   aj   = a->j;
2246   mbs  = a->mbs;
2247 
2248   ierr = VecSet(v,zero);CHKERRQ(ierr);
2249   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2250   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2251   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2252   for (i=0; i<mbs; i++) {
2253     ncols = ai[1] - ai[0]; ai++;
2254     brow  = bs*i;
2255     for (j=0; j<ncols; j++){
2256       for (kcol=0; kcol<bs; kcol++){
2257         for (krow=0; krow<bs; krow++){
2258           atmp = PetscAbsScalar(*aa);aa++;
2259           row = brow + krow;    /* row index */
2260           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2261           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2262         }
2263       }
2264       aj++;
2265     }
2266   }
2267   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatCopy_SeqBAIJ"
2273 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2274 {
2275   PetscErrorCode ierr;
2276 
2277   PetscFunctionBegin;
2278   /* If the two matrices have the same copy implementation, use fast copy. */
2279   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2280     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2281     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2282 
2283     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
2284       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2285     }
2286     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
2287   } else {
2288     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 #undef __FUNCT__
2294 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2295 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2296 {
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2301   PetscFunctionReturn(0);
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2306 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2307 {
2308   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2309   PetscFunctionBegin;
2310   *array = a->a;
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2316 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2317 {
2318   PetscFunctionBegin;
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #include "petscblaslapack.h"
2323 #undef __FUNCT__
2324 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2325 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2326 {
2327   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2328   PetscErrorCode ierr;
2329   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2330   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2331 
2332   PetscFunctionBegin;
2333   if (str == SAME_NONZERO_PATTERN) {
2334     PetscScalar alpha = a;
2335     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2336   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2337     if (y->xtoy && y->XtoY != X) {
2338       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2339       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2340     }
2341     if (!y->xtoy) { /* get xtoy */
2342       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2343       y->XtoY = X;
2344     }
2345     bs2 = bs*bs;
2346     for (i=0; i<x->nz; i++) {
2347       j = 0;
2348       while (j < bs2){
2349         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2350         j++;
2351       }
2352     }
2353     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2354   } else {
2355     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2356   }
2357   PetscFunctionReturn(0);
2358 }
2359 
2360 #undef __FUNCT__
2361 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2362 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2363 {
2364   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2365   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2366   MatScalar      *aa = a->a;
2367 
2368   PetscFunctionBegin;
2369   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2375 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2376 {
2377   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2378   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2379   MatScalar      *aa = a->a;
2380 
2381   PetscFunctionBegin;
2382   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 
2387 /* -------------------------------------------------------------------*/
2388 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2389        MatGetRow_SeqBAIJ,
2390        MatRestoreRow_SeqBAIJ,
2391        MatMult_SeqBAIJ_N,
2392 /* 4*/ MatMultAdd_SeqBAIJ_N,
2393        MatMultTranspose_SeqBAIJ,
2394        MatMultTransposeAdd_SeqBAIJ,
2395        MatSolve_SeqBAIJ_N,
2396        0,
2397        0,
2398 /*10*/ 0,
2399        MatLUFactor_SeqBAIJ,
2400        0,
2401        0,
2402        MatTranspose_SeqBAIJ,
2403 /*15*/ MatGetInfo_SeqBAIJ,
2404        MatEqual_SeqBAIJ,
2405        MatGetDiagonal_SeqBAIJ,
2406        MatDiagonalScale_SeqBAIJ,
2407        MatNorm_SeqBAIJ,
2408 /*20*/ 0,
2409        MatAssemblyEnd_SeqBAIJ,
2410        0,
2411        MatSetOption_SeqBAIJ,
2412        MatZeroEntries_SeqBAIJ,
2413 /*25*/ MatZeroRows_SeqBAIJ,
2414        MatLUFactorSymbolic_SeqBAIJ,
2415        MatLUFactorNumeric_SeqBAIJ_N,
2416        MatCholeskyFactorSymbolic_SeqBAIJ,
2417        MatCholeskyFactorNumeric_SeqBAIJ_N,
2418 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2419        MatILUFactorSymbolic_SeqBAIJ,
2420        MatICCFactorSymbolic_SeqBAIJ,
2421        MatGetArray_SeqBAIJ,
2422        MatRestoreArray_SeqBAIJ,
2423 /*35*/ MatDuplicate_SeqBAIJ,
2424        0,
2425        0,
2426        MatILUFactor_SeqBAIJ,
2427        0,
2428 /*40*/ MatAXPY_SeqBAIJ,
2429        MatGetSubMatrices_SeqBAIJ,
2430        MatIncreaseOverlap_SeqBAIJ,
2431        MatGetValues_SeqBAIJ,
2432        MatCopy_SeqBAIJ,
2433 /*45*/ 0,
2434        MatScale_SeqBAIJ,
2435        0,
2436        0,
2437        0,
2438 /*50*/ 0,
2439        MatGetRowIJ_SeqBAIJ,
2440        MatRestoreRowIJ_SeqBAIJ,
2441        0,
2442        0,
2443 /*55*/ 0,
2444        0,
2445        0,
2446        0,
2447        MatSetValuesBlocked_SeqBAIJ,
2448 /*60*/ MatGetSubMatrix_SeqBAIJ,
2449        MatDestroy_SeqBAIJ,
2450        MatView_SeqBAIJ,
2451        0,
2452        0,
2453 /*65*/ 0,
2454        0,
2455        0,
2456        0,
2457        0,
2458 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2459        MatConvert_Basic,
2460        0,
2461        0,
2462        0,
2463 /*75*/ 0,
2464        0,
2465        0,
2466        0,
2467        0,
2468 /*80*/ 0,
2469        0,
2470        0,
2471        0,
2472        MatLoad_SeqBAIJ,
2473 /*85*/ 0,
2474        0,
2475        0,
2476        0,
2477        0,
2478 /*90*/ 0,
2479        0,
2480        0,
2481        0,
2482        0,
2483 /*95*/ 0,
2484        0,
2485        0,
2486        0,
2487        0,
2488 /*100*/0,
2489        0,
2490        0,
2491        0,
2492        0,
2493 /*105*/0,
2494        MatRealPart_SeqBAIJ,
2495        MatImaginaryPart_SeqBAIJ,
2496        0,
2497        0,
2498 /*110*/0,
2499        0,
2500        0,
2501        0,
2502        MatMissingDiagonal_SeqBAIJ
2503 /*115*/
2504 };
2505 
2506 EXTERN_C_BEGIN
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2509 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2510 {
2511   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2512   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2513   PetscErrorCode ierr;
2514 
2515   PetscFunctionBegin;
2516   if (aij->nonew != 1) {
2517     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2518   }
2519 
2520   /* allocate space for values if not already there */
2521   if (!aij->saved_values) {
2522     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2523   }
2524 
2525   /* copy values over */
2526   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 EXTERN_C_END
2530 
2531 EXTERN_C_BEGIN
2532 #undef __FUNCT__
2533 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2534 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2535 {
2536   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2537   PetscErrorCode ierr;
2538   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2539 
2540   PetscFunctionBegin;
2541   if (aij->nonew != 1) {
2542     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2543   }
2544   if (!aij->saved_values) {
2545     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2546   }
2547 
2548   /* copy values over */
2549   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2550   PetscFunctionReturn(0);
2551 }
2552 EXTERN_C_END
2553 
2554 EXTERN_C_BEGIN
2555 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2556 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2557 EXTERN_C_END
2558 
2559 EXTERN_C_BEGIN
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2562 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2563 {
2564   Mat_SeqBAIJ    *b;
2565   PetscErrorCode ierr;
2566   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2567   PetscTruth     flg,skipallocation = PETSC_FALSE;
2568 
2569   PetscFunctionBegin;
2570 
2571   if (nz == MAT_SKIP_ALLOCATION) {
2572     skipallocation = PETSC_TRUE;
2573     nz             = 0;
2574   }
2575 
2576   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2577     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2578   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2579 
2580   if (nnz && newbs != bs) {
2581     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2582   }
2583   bs   = newbs;
2584 
2585   B->rmap.bs = B->cmap.bs = bs;
2586   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2587   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2588 
2589   B->preallocated = PETSC_TRUE;
2590 
2591   mbs  = B->rmap.n/bs;
2592   nbs  = B->cmap.n/bs;
2593   bs2  = bs*bs;
2594 
2595   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2596     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2597   }
2598 
2599   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2600   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2601   if (nnz) {
2602     for (i=0; i<mbs; i++) {
2603       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2604       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2605     }
2606   }
2607 
2608   b       = (Mat_SeqBAIJ*)B->data;
2609   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2610     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2611   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2612 
2613   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2614   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2615   if (!flg) {
2616     switch (bs) {
2617     case 1:
2618       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2619       B->ops->mult            = MatMult_SeqBAIJ_1;
2620       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2621       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2622       break;
2623     case 2:
2624       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2625       B->ops->mult            = MatMult_SeqBAIJ_2;
2626       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2627       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2628       break;
2629     case 3:
2630       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2631       B->ops->mult            = MatMult_SeqBAIJ_3;
2632       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2633       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2634       break;
2635     case 4:
2636       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2637       B->ops->mult            = MatMult_SeqBAIJ_4;
2638       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2639       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2640       break;
2641     case 5:
2642       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2643       B->ops->mult            = MatMult_SeqBAIJ_5;
2644       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2645       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2646       break;
2647     case 6:
2648       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2649       B->ops->mult            = MatMult_SeqBAIJ_6;
2650       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2651       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2652       break;
2653     case 7:
2654       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2655       B->ops->mult            = MatMult_SeqBAIJ_7;
2656       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2657       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2658       break;
2659     default:
2660       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2661       B->ops->mult            = MatMult_SeqBAIJ_N;
2662       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2663       break;
2664     }
2665   }
2666   B->rmap.bs      = bs;
2667   b->mbs     = mbs;
2668   b->nbs     = nbs;
2669   if (!skipallocation) {
2670     if (!b->imax) {
2671       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2672     }
2673     /* b->ilen will count nonzeros in each block row so far. */
2674     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2675     if (!nnz) {
2676       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2677       else if (nz <= 0)        nz = 1;
2678       for (i=0; i<mbs; i++) b->imax[i] = nz;
2679       nz = nz*mbs;
2680     } else {
2681       nz = 0;
2682       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2683     }
2684 
2685     /* allocate the matrix space */
2686     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2687     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2688     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2689     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2690     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2691     b->singlemalloc = PETSC_TRUE;
2692     b->i[0] = 0;
2693     for (i=1; i<mbs+1; i++) {
2694       b->i[i] = b->i[i-1] + b->imax[i-1];
2695     }
2696     b->free_a     = PETSC_TRUE;
2697     b->free_ij    = PETSC_TRUE;
2698   } else {
2699     b->free_a     = PETSC_FALSE;
2700     b->free_ij    = PETSC_FALSE;
2701   }
2702 
2703   B->rmap.bs          = bs;
2704   b->bs2              = bs2;
2705   b->mbs              = mbs;
2706   b->nz               = 0;
2707   b->maxnz            = nz*bs2;
2708   B->info.nz_unneeded = (PetscReal)b->maxnz;
2709   PetscFunctionReturn(0);
2710 }
2711 EXTERN_C_END
2712 
2713 EXTERN_C_BEGIN
2714 #undef __FUNCT__
2715 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2716 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2717 {
2718   PetscInt       i,m,nz,nz_max=0,*nnz;
2719   PetscScalar    *values=0;
2720   PetscErrorCode ierr;
2721 
2722   PetscFunctionBegin;
2723 
2724   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2725   B->rmap.bs = bs;
2726   B->cmap.bs = bs;
2727   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2728   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2729   m = B->rmap.n/bs;
2730 
2731   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2732   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2733   for(i=0; i<m; i++) {
2734     nz = ii[i+1]- ii[i];
2735     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2736     nz_max = PetscMax(nz_max, nz);
2737     nnz[i] = nz;
2738   }
2739   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2740   ierr = PetscFree(nnz);CHKERRQ(ierr);
2741 
2742   values = (PetscScalar*)V;
2743   if (!values) {
2744     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2745     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2746   }
2747   for (i=0; i<m; i++) {
2748     PetscInt          ncols  = ii[i+1] - ii[i];
2749     const PetscInt    *icols = jj + ii[i];
2750     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2751     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2752   }
2753   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2754   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2755   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2756 
2757   PetscFunctionReturn(0);
2758 }
2759 EXTERN_C_END
2760 
2761 
2762 EXTERN_C_BEGIN
2763 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2764 EXTERN_C_END
2765 
2766 /*MC
2767    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2768    block sparse compressed row format.
2769 
2770    Options Database Keys:
2771 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2772 
2773   Level: beginner
2774 
2775 .seealso: MatCreateSeqBAIJ()
2776 M*/
2777 
2778 
2779 EXTERN_C_BEGIN
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatCreate_SeqBAIJ"
2782 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2783 {
2784   PetscErrorCode ierr;
2785   PetscMPIInt    size;
2786   Mat_SeqBAIJ    *b;
2787 
2788   PetscFunctionBegin;
2789   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2790   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2791 
2792   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2793   B->data = (void*)b;
2794   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2795   B->mapping          = 0;
2796   b->row              = 0;
2797   b->col              = 0;
2798   b->icol             = 0;
2799   b->reallocs         = 0;
2800   b->saved_values     = 0;
2801 
2802   b->roworiented      = PETSC_TRUE;
2803   b->nonew            = 0;
2804   b->diag             = 0;
2805   b->solve_work       = 0;
2806   b->mult_work        = 0;
2807   B->spptr            = 0;
2808   B->info.nz_unneeded = (PetscReal)b->maxnz;
2809   b->keepzeroedrows   = PETSC_FALSE;
2810   b->xtoy              = 0;
2811   b->XtoY              = 0;
2812   b->compressedrow.use     = PETSC_FALSE;
2813   b->compressedrow.nrows   = 0;
2814   b->compressedrow.i       = PETSC_NULL;
2815   b->compressedrow.rindex  = PETSC_NULL;
2816   b->compressedrow.checked = PETSC_FALSE;
2817   B->same_nonzero          = PETSC_FALSE;
2818 
2819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2820                                      "MatGetFactor_seqbaij_petsc",
2821                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2823                                      "MatInvertBlockDiagonal_SeqBAIJ",
2824                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2826                                      "MatStoreValues_SeqBAIJ",
2827                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2829                                      "MatRetrieveValues_SeqBAIJ",
2830                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2832                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2833                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2835                                      "MatConvert_SeqBAIJ_SeqAIJ",
2836                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2838                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2839                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2841                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2842                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2844                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2845                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2846   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2847   PetscFunctionReturn(0);
2848 }
2849 EXTERN_C_END
2850 
2851 #undef __FUNCT__
2852 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2853 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2854 {
2855   Mat            C = *B;
2856   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2857   PetscErrorCode ierr;
2858   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2859 
2860   PetscFunctionBegin;
2861   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2862 
2863   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2864   for (i=0; i<mbs; i++) {
2865     c->imax[i] = a->imax[i];
2866     c->ilen[i] = a->ilen[i];
2867   }
2868 
2869   /* allocate the matrix space */
2870   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2871   c->singlemalloc = PETSC_TRUE;
2872   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2873   if (mbs > 0) {
2874     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2875     if (cpvalues == MAT_COPY_VALUES) {
2876       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2877     } else {
2878       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2879     }
2880   }
2881   c->roworiented = a->roworiented;
2882   c->nonew       = a->nonew;
2883   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->rmap,&C->rmap);CHKERRQ(ierr);
2884   ierr = PetscMapCopy(((PetscObject)A)->comm,&A->cmap,&C->cmap);CHKERRQ(ierr);
2885   c->bs2         = a->bs2;
2886   c->mbs         = a->mbs;
2887   c->nbs         = a->nbs;
2888 
2889   if (a->diag) {
2890     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2891     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2892     for (i=0; i<mbs; i++) {
2893       c->diag[i] = a->diag[i];
2894     }
2895   } else c->diag        = 0;
2896   c->nz                 = a->nz;
2897   c->maxnz              = a->maxnz;
2898   c->solve_work         = 0;
2899   c->mult_work          = 0;
2900   c->free_a             = PETSC_TRUE;
2901   c->free_ij            = PETSC_TRUE;
2902   C->preallocated       = PETSC_TRUE;
2903   C->assembled          = PETSC_TRUE;
2904 
2905   c->compressedrow.use     = a->compressedrow.use;
2906   c->compressedrow.nrows   = a->compressedrow.nrows;
2907   c->compressedrow.checked = a->compressedrow.checked;
2908   if ( a->compressedrow.checked && a->compressedrow.use){
2909     i = a->compressedrow.nrows;
2910     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2911     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2912     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2913     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2914   } else {
2915     c->compressedrow.use    = PETSC_FALSE;
2916     c->compressedrow.i      = PETSC_NULL;
2917     c->compressedrow.rindex = PETSC_NULL;
2918   }
2919   C->same_nonzero = A->same_nonzero;
2920   *B = C;
2921   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2922   PetscFunctionReturn(0);
2923 }
2924 
2925 #undef __FUNCT__
2926 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2927 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2928 {
2929     PetscErrorCode ierr;
2930 
2931   PetscFunctionBegin;
2932   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2933   ierr = MatSetSizes(*B,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2934   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2935   ierr = MatDuplicateNoCreate_SeqBAIJ(A,cpvalues,B);
2936   PetscFunctionReturn(0);
2937 }
2938 
2939 #undef __FUNCT__
2940 #define __FUNCT__ "MatLoad_SeqBAIJ"
2941 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2942 {
2943   Mat_SeqBAIJ    *a;
2944   Mat            B;
2945   PetscErrorCode ierr;
2946   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2947   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2948   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2949   PetscInt       *masked,nmask,tmp,bs2,ishift;
2950   PetscMPIInt    size;
2951   int            fd;
2952   PetscScalar    *aa;
2953   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2954 
2955   PetscFunctionBegin;
2956   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2957     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2958   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2959   bs2  = bs*bs;
2960 
2961   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2962   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2963   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2964   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2965   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2966   M = header[1]; N = header[2]; nz = header[3];
2967 
2968   if (header[3] < 0) {
2969     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2970   }
2971 
2972   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2973 
2974   /*
2975      This code adds extra rows to make sure the number of rows is
2976     divisible by the blocksize
2977   */
2978   mbs        = M/bs;
2979   extra_rows = bs - M + bs*(mbs);
2980   if (extra_rows == bs) extra_rows = 0;
2981   else                  mbs++;
2982   if (extra_rows) {
2983     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2984   }
2985 
2986   /* read in row lengths */
2987   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2988   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2989   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2990 
2991   /* read in column indices */
2992   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2993   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2994   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2995 
2996   /* loop over row lengths determining block row lengths */
2997   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2998   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2999   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3000   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3001   masked   = mask + mbs;
3002   rowcount = 0; nzcount = 0;
3003   for (i=0; i<mbs; i++) {
3004     nmask = 0;
3005     for (j=0; j<bs; j++) {
3006       kmax = rowlengths[rowcount];
3007       for (k=0; k<kmax; k++) {
3008         tmp = jj[nzcount++]/bs;
3009         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3010       }
3011       rowcount++;
3012     }
3013     browlengths[i] += nmask;
3014     /* zero out the mask elements we set */
3015     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3016   }
3017 
3018   /* create our matrix */
3019   ierr = MatCreate(comm,&B);
3020   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3021   ierr = MatSetType(B,type);CHKERRQ(ierr);
3022   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3023   a = (Mat_SeqBAIJ*)B->data;
3024 
3025   /* set matrix "i" values */
3026   a->i[0] = 0;
3027   for (i=1; i<= mbs; i++) {
3028     a->i[i]      = a->i[i-1] + browlengths[i-1];
3029     a->ilen[i-1] = browlengths[i-1];
3030   }
3031   a->nz         = 0;
3032   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3033 
3034   /* read in nonzero values */
3035   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3036   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3037   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3038 
3039   /* set "a" and "j" values into matrix */
3040   nzcount = 0; jcount = 0;
3041   for (i=0; i<mbs; i++) {
3042     nzcountb = nzcount;
3043     nmask    = 0;
3044     for (j=0; j<bs; j++) {
3045       kmax = rowlengths[i*bs+j];
3046       for (k=0; k<kmax; k++) {
3047         tmp = jj[nzcount++]/bs;
3048 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3049       }
3050     }
3051     /* sort the masked values */
3052     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3053 
3054     /* set "j" values into matrix */
3055     maskcount = 1;
3056     for (j=0; j<nmask; j++) {
3057       a->j[jcount++]  = masked[j];
3058       mask[masked[j]] = maskcount++;
3059     }
3060     /* set "a" values into matrix */
3061     ishift = bs2*a->i[i];
3062     for (j=0; j<bs; j++) {
3063       kmax = rowlengths[i*bs+j];
3064       for (k=0; k<kmax; k++) {
3065         tmp       = jj[nzcountb]/bs ;
3066         block     = mask[tmp] - 1;
3067         point     = jj[nzcountb] - bs*tmp;
3068         idx       = ishift + bs2*block + j + bs*point;
3069         a->a[idx] = (MatScalar)aa[nzcountb++];
3070       }
3071     }
3072     /* zero out the mask elements we set */
3073     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3074   }
3075   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3076 
3077   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3078   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3079   ierr = PetscFree(aa);CHKERRQ(ierr);
3080   ierr = PetscFree(jj);CHKERRQ(ierr);
3081   ierr = PetscFree(mask);CHKERRQ(ierr);
3082 
3083   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3084   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3085   ierr = MatView_Private(B);CHKERRQ(ierr);
3086 
3087   *A = B;
3088   PetscFunctionReturn(0);
3089 }
3090 
3091 #undef __FUNCT__
3092 #define __FUNCT__ "MatCreateSeqBAIJ"
3093 /*@C
3094    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3095    compressed row) format.  For good matrix assembly performance the
3096    user should preallocate the matrix storage by setting the parameter nz
3097    (or the array nnz).  By setting these parameters accurately, performance
3098    during matrix assembly can be increased by more than a factor of 50.
3099 
3100    Collective on MPI_Comm
3101 
3102    Input Parameters:
3103 +  comm - MPI communicator, set to PETSC_COMM_SELF
3104 .  bs - size of block
3105 .  m - number of rows
3106 .  n - number of columns
3107 .  nz - number of nonzero blocks  per block row (same for all rows)
3108 -  nnz - array containing the number of nonzero blocks in the various block rows
3109          (possibly different for each block row) or PETSC_NULL
3110 
3111    Output Parameter:
3112 .  A - the matrix
3113 
3114    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3115    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
3116    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
3117    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3118 
3119    Options Database Keys:
3120 .   -mat_no_unroll - uses code that does not unroll the loops in the
3121                      block calculations (much slower)
3122 .    -mat_block_size - size of the blocks to use
3123 
3124    Level: intermediate
3125 
3126    Notes:
3127    The number of rows and columns must be divisible by blocksize.
3128 
3129    If the nnz parameter is given then the nz parameter is ignored
3130 
3131    A nonzero block is any block that as 1 or more nonzeros in it
3132 
3133    The block AIJ format is fully compatible with standard Fortran 77
3134    storage.  That is, the stored row and column indices can begin at
3135    either one (as in Fortran) or zero.  See the users' manual for details.
3136 
3137    Specify the preallocated storage with either nz or nnz (not both).
3138    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3139    allocation.  For additional details, see the users manual chapter on
3140    matrices.
3141 
3142 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3143 @*/
3144 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3145 {
3146   PetscErrorCode ierr;
3147 
3148   PetscFunctionBegin;
3149   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3150   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3151   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3152   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3158 /*@C
3159    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3160    per row in the matrix. For good matrix assembly performance the
3161    user should preallocate the matrix storage by setting the parameter nz
3162    (or the array nnz).  By setting these parameters accurately, performance
3163    during matrix assembly can be increased by more than a factor of 50.
3164 
3165    Collective on MPI_Comm
3166 
3167    Input Parameters:
3168 +  A - the matrix
3169 .  bs - size of block
3170 .  nz - number of block nonzeros per block row (same for all rows)
3171 -  nnz - array containing the number of block nonzeros in the various block rows
3172          (possibly different for each block row) or PETSC_NULL
3173 
3174    Options Database Keys:
3175 .   -mat_no_unroll - uses code that does not unroll the loops in the
3176                      block calculations (much slower)
3177 .    -mat_block_size - size of the blocks to use
3178 
3179    Level: intermediate
3180 
3181    Notes:
3182    If the nnz parameter is given then the nz parameter is ignored
3183 
3184    You can call MatGetInfo() to get information on how effective the preallocation was;
3185    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3186    You can also run with the option -info and look for messages with the string
3187    malloc in them to see if additional memory allocation was needed.
3188 
3189    The block AIJ format is fully compatible with standard Fortran 77
3190    storage.  That is, the stored row and column indices can begin at
3191    either one (as in Fortran) or zero.  See the users' manual for details.
3192 
3193    Specify the preallocated storage with either nz or nnz (not both).
3194    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3195    allocation.  For additional details, see the users manual chapter on
3196    matrices.
3197 
3198 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3199 @*/
3200 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3201 {
3202   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3203 
3204   PetscFunctionBegin;
3205   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3206   if (f) {
3207     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3208   }
3209   PetscFunctionReturn(0);
3210 }
3211 
3212 #undef __FUNCT__
3213 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3214 /*@C
3215    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3216    (the default sequential PETSc format).
3217 
3218    Collective on MPI_Comm
3219 
3220    Input Parameters:
3221 +  A - the matrix
3222 .  i - the indices into j for the start of each local row (starts with zero)
3223 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3224 -  v - optional values in the matrix
3225 
3226    Level: developer
3227 
3228 .keywords: matrix, aij, compressed row, sparse
3229 
3230 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3231 @*/
3232 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3233 {
3234   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3235 
3236   PetscFunctionBegin;
3237   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3238   if (f) {
3239     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3240   }
3241   PetscFunctionReturn(0);
3242 }
3243 
3244 
3245 #undef __FUNCT__
3246 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3247 /*@
3248      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3249               (upper triangular entries in CSR format) provided by the user.
3250 
3251      Collective on MPI_Comm
3252 
3253    Input Parameters:
3254 +  comm - must be an MPI communicator of size 1
3255 .  bs - size of block
3256 .  m - number of rows
3257 .  n - number of columns
3258 .  i - row indices
3259 .  j - column indices
3260 -  a - matrix values
3261 
3262    Output Parameter:
3263 .  mat - the matrix
3264 
3265    Level: intermediate
3266 
3267    Notes:
3268        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3269     once the matrix is destroyed
3270 
3271        You cannot set new nonzero locations into this matrix, that will generate an error.
3272 
3273        The i and j indices are 0 based
3274 
3275 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3276 
3277 @*/
3278 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3279 {
3280   PetscErrorCode ierr;
3281   PetscInt       ii;
3282   Mat_SeqBAIJ    *baij;
3283 
3284   PetscFunctionBegin;
3285   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3286   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3287 
3288   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3289   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3290   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3291   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3292   baij = (Mat_SeqBAIJ*)(*mat)->data;
3293   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3294 
3295   baij->i = i;
3296   baij->j = j;
3297   baij->a = a;
3298   baij->singlemalloc = PETSC_FALSE;
3299   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3300   baij->free_a       = PETSC_FALSE;
3301   baij->free_ij       = PETSC_FALSE;
3302 
3303   for (ii=0; ii<m; ii++) {
3304     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3305 #if defined(PETSC_USE_DEBUG)
3306     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3307 #endif
3308   }
3309 #if defined(PETSC_USE_DEBUG)
3310   for (ii=0; ii<baij->i[m]; ii++) {
3311     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3312     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3313   }
3314 #endif
3315 
3316   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3317   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3318   PetscFunctionReturn(0);
3319 }
3320