xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fd705b320d8d44969be9ca25a36dbdd35fbe8e12)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
233   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
248       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
250       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
251       i2     = 3;
252       idiag += 9;
253       for (i=1; i<m; i++) {
254 	v     = aa + 9*ai[i];
255 	vi    = aj + ai[i];
256 	nz    = diag[i] - ai[i];
257 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
258 	while (nz--) {
259 	  idx  = 3*(*vi++);
260 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
261 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
262 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
263 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
264 	  v   += 9;
265 	}
266 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
267 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
268 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
269         idiag   += 9;
270         i2      += 3;
271       }
272       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
273       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
274     }
275     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
276         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
277       i2    = 0;
278       mdiag = a->idiag+9*a->mbs;
279       for (i=0; i<m; i++) {
280         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
281         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
282         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
283         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
284         mdiag  += 9;
285         i2     += 3;
286       }
287       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
288     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
289       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
290     }
291     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
292       idiag   = a->idiag+9*a->mbs - 9;
293       i2      = 3*m - 3;
294       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
295       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
296       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
297       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
298       idiag -= 9;
299       i2    -= 3;
300       for (i=m-2; i>=0; i--) {
301 	v     = aa + 9*(diag[i]+1);
302 	vi    = aj + diag[i] + 1;
303 	nz    = ai[i+1] - diag[i] - 1;
304 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
305 	while (nz--) {
306 	  idx  = 3*(*vi++);
307 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
308 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
309 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
310 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
311 	  v   += 9;
312 	}
313 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
314 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
315 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
316         idiag   -= 9;
317         i2      -= 3;
318       }
319       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
320     }
321   } else {
322     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
323   }
324   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
325   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
326   PetscFunctionReturn(0);
327 }
328 
329 #undef __FUNCT__
330 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
331 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
332 {
333   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
334   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
335   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
336   PetscErrorCode     ierr;
337   PetscInt           m = a->mbs,i,i2,nz,idx;
338   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
339 
340   PetscFunctionBegin;
341   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
342   its = its*lits;
343   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
344   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
345   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
346   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
347   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
348 
349   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
350 
351   diag  = a->diag;
352   idiag = a->idiag;
353   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
354   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
355 
356   if (flag & SOR_ZERO_INITIAL_GUESS) {
357     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
358       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
359       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
360       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
361       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
362       i2     = 4;
363       idiag += 16;
364       for (i=1; i<m; i++) {
365 	v     = aa + 16*ai[i];
366 	vi    = aj + ai[i];
367 	nz    = diag[i] - ai[i];
368 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
369 	while (nz--) {
370 	  idx  = 4*(*vi++);
371 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
372 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
373 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
374 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
375 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
376 	  v   += 16;
377 	}
378 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
379 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
380 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
381 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
382         idiag   += 16;
383         i2      += 4;
384       }
385       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
386       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
387     }
388     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
389         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
390       i2    = 0;
391       mdiag = a->idiag+16*a->mbs;
392       for (i=0; i<m; i++) {
393         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
394         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
395         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
396         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
397         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
398         mdiag  += 16;
399         i2     += 4;
400       }
401       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
402     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
403       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
404     }
405     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
406       idiag   = a->idiag+16*a->mbs - 16;
407       i2      = 4*m - 4;
408       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
409       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
410       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
411       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
412       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
413       idiag -= 16;
414       i2    -= 4;
415       for (i=m-2; i>=0; i--) {
416 	v     = aa + 16*(diag[i]+1);
417 	vi    = aj + diag[i] + 1;
418 	nz    = ai[i+1] - diag[i] - 1;
419 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
420 	while (nz--) {
421 	  idx  = 4*(*vi++);
422 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
423 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
424 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
425 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
426 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
427 	  v   += 16;
428 	}
429 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
430 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
431 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
432 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
433         idiag   -= 16;
434         i2      -= 4;
435       }
436       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
437     }
438   } else {
439     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
440   }
441   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
442   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 #undef __FUNCT__
447 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
448 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
449 {
450   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
451   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
452   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
453   PetscErrorCode     ierr;
454   PetscInt           m = a->mbs,i,i2,nz,idx;
455   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
456 
457   PetscFunctionBegin;
458   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
459   its = its*lits;
460   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
461   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
462   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
463   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
464   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
465 
466   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
467 
468   diag  = a->diag;
469   idiag = a->idiag;
470   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
471   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
472 
473   if (flag & SOR_ZERO_INITIAL_GUESS) {
474     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
475       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
476       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
477       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
478       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
479       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
480       i2     = 5;
481       idiag += 25;
482       for (i=1; i<m; i++) {
483 	v     = aa + 25*ai[i];
484 	vi    = aj + ai[i];
485 	nz    = diag[i] - ai[i];
486 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
487 	while (nz--) {
488 	  idx  = 5*(*vi++);
489 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
490 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
491 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
492 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
493 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
494 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
495 	  v   += 25;
496 	}
497 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
498 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
499 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
500 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
501 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
502         idiag   += 25;
503         i2      += 5;
504       }
505       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
506       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
507     }
508     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
509         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
510       i2    = 0;
511       mdiag = a->idiag+25*a->mbs;
512       for (i=0; i<m; i++) {
513         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
514         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
515         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
516         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
517         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
518         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
519         mdiag  += 25;
520         i2     += 5;
521       }
522       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
523     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
524       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
525     }
526     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
527       idiag   = a->idiag+25*a->mbs - 25;
528       i2      = 5*m - 5;
529       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
530       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
531       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
532       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
533       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
534       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
535       idiag -= 25;
536       i2    -= 5;
537       for (i=m-2; i>=0; i--) {
538 	v     = aa + 25*(diag[i]+1);
539 	vi    = aj + diag[i] + 1;
540 	nz    = ai[i+1] - diag[i] - 1;
541 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
542 	while (nz--) {
543 	  idx  = 5*(*vi++);
544 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
545 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
546 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
547 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
548 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
549 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
550 	  v   += 25;
551 	}
552 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
553 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
554 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
555 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
556 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
557         idiag   -= 25;
558         i2      -= 5;
559       }
560       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
561     }
562   } else {
563     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
564   }
565   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
566   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
567   PetscFunctionReturn(0);
568 }
569 
570 /*
571     Special version for direct calls from Fortran (Used in PETSc-fun3d)
572 */
573 #if defined(PETSC_HAVE_FORTRAN_CAPS)
574 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
575 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
576 #define matsetvaluesblocked4_ matsetvaluesblocked4
577 #endif
578 
579 EXTERN_C_BEGIN
580 #undef __FUNCT__
581 #define __FUNCT__ "matsetvaluesblocked4_"
582 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
583 {
584   Mat               A = *AA;
585   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
586   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
587   PetscInt          *ai=a->i,*ailen=a->ilen;
588   PetscInt          *aj=a->j,stepval,lastcol = -1;
589   const PetscScalar *value = v;
590   MatScalar         *ap,*aa = a->a,*bap;
591 
592   PetscFunctionBegin;
593   if (A->rmap.bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
594   stepval = (n-1)*4;
595   for (k=0; k<m; k++) { /* loop over added rows */
596     row  = im[k];
597     rp   = aj + ai[row];
598     ap   = aa + 16*ai[row];
599     nrow = ailen[row];
600     low  = 0;
601     high = nrow;
602     for (l=0; l<n; l++) { /* loop over added columns */
603       col = in[l];
604       if (col <= lastcol) low = 0; else high = nrow;
605       lastcol = col;
606       value = v + k*(stepval+4 + l)*4;
607       while (high-low > 7) {
608         t = (low+high)/2;
609         if (rp[t] > col) high = t;
610         else             low  = t;
611       }
612       for (i=low; i<high; i++) {
613         if (rp[i] > col) break;
614         if (rp[i] == col) {
615           bap  = ap +  16*i;
616           for (ii=0; ii<4; ii++,value+=stepval) {
617             for (jj=ii; jj<16; jj+=4) {
618               bap[jj] += *value++;
619             }
620           }
621           goto noinsert2;
622         }
623       }
624       N = nrow++ - 1;
625       high++; /* added new column index thus must search to one higher than before */
626       /* shift up all the later entries in this row */
627       for (ii=N; ii>=i; ii--) {
628         rp[ii+1] = rp[ii];
629         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
630       }
631       if (N >= i) {
632         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
633       }
634       rp[i] = col;
635       bap   = ap +  16*i;
636       for (ii=0; ii<4; ii++,value+=stepval) {
637         for (jj=ii; jj<16; jj+=4) {
638           bap[jj] = *value++;
639         }
640       }
641       noinsert2:;
642       low = i;
643     }
644     ailen[row] = nrow;
645   }
646   PetscFunctionReturnVoid();
647 }
648 EXTERN_C_END
649 
650 #if defined(PETSC_HAVE_FORTRAN_CAPS)
651 #define matsetvalues4_ MATSETVALUES4
652 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
653 #define matsetvalues4_ matsetvalues4
654 #endif
655 
656 EXTERN_C_BEGIN
657 #undef __FUNCT__
658 #define __FUNCT__ "MatSetValues4_"
659 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
660 {
661   Mat         A = *AA;
662   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
663   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
664   PetscInt    *ai=a->i,*ailen=a->ilen;
665   PetscInt    *aj=a->j,brow,bcol;
666   PetscInt    ridx,cidx,lastcol = -1;
667   MatScalar   *ap,value,*aa=a->a,*bap;
668 
669   PetscFunctionBegin;
670   for (k=0; k<m; k++) { /* loop over added rows */
671     row  = im[k]; brow = row/4;
672     rp   = aj + ai[brow];
673     ap   = aa + 16*ai[brow];
674     nrow = ailen[brow];
675     low  = 0;
676     high = nrow;
677     for (l=0; l<n; l++) { /* loop over added columns */
678       col = in[l]; bcol = col/4;
679       ridx = row % 4; cidx = col % 4;
680       value = v[l + k*n];
681       if (col <= lastcol) low = 0; else high = nrow;
682       lastcol = col;
683       while (high-low > 7) {
684         t = (low+high)/2;
685         if (rp[t] > bcol) high = t;
686         else              low  = t;
687       }
688       for (i=low; i<high; i++) {
689         if (rp[i] > bcol) break;
690         if (rp[i] == bcol) {
691           bap  = ap +  16*i + 4*cidx + ridx;
692           *bap += value;
693           goto noinsert1;
694         }
695       }
696       N = nrow++ - 1;
697       high++; /* added new column thus must search to one higher than before */
698       /* shift up all the later entries in this row */
699       for (ii=N; ii>=i; ii--) {
700         rp[ii+1] = rp[ii];
701         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
702       }
703       if (N>=i) {
704         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
705       }
706       rp[i]                    = bcol;
707       ap[16*i + 4*cidx + ridx] = value;
708       noinsert1:;
709       low = i;
710     }
711     ailen[brow] = nrow;
712   }
713   PetscFunctionReturnVoid();
714 }
715 EXTERN_C_END
716 
717 /*  UGLY, ugly, ugly
718    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
719    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
720    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
721    converts the entries into single precision and then calls ..._MatScalar() to put them
722    into the single precision data structures.
723 */
724 #if defined(PETSC_USE_MAT_SINGLE)
725 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
726 #else
727 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
728 #endif
729 
730 #define CHUNKSIZE  10
731 
732 /*
733      Checks for missing diagonals
734 */
735 #undef __FUNCT__
736 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
737 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
738 {
739   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
740   PetscErrorCode ierr;
741   PetscInt       *diag,*jj = a->j,i;
742 
743   PetscFunctionBegin;
744   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
745   *missing = PETSC_FALSE;
746   diag     = a->diag;
747   for (i=0; i<a->mbs; i++) {
748     if (jj[diag[i]] != i) {
749       *missing  = PETSC_TRUE;
750       if (d) *d = i;
751     }
752   }
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
758 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
759 {
760   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
761   PetscErrorCode ierr;
762   PetscInt       i,j,m = a->mbs;
763 
764   PetscFunctionBegin;
765   if (!a->diag) {
766     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
767   }
768   for (i=0; i<m; i++) {
769     a->diag[i] = a->i[i+1];
770     for (j=a->i[i]; j<a->i[i+1]; j++) {
771       if (a->j[j] == i) {
772         a->diag[i] = j;
773         break;
774       }
775     }
776   }
777   PetscFunctionReturn(0);
778 }
779 
780 
781 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
785 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
786 {
787   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
788   PetscErrorCode ierr;
789   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap.bs,nbs = 1,k,l,cnt;
790   PetscInt       *tia, *tja;
791 
792   PetscFunctionBegin;
793   *nn = n;
794   if (!ia) PetscFunctionReturn(0);
795   if (symmetric) {
796     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
797   } else {
798     tia = a->i; tja = a->j;
799   }
800 
801   if (!blockcompressed && bs > 1) {
802     (*nn) *= bs;
803     nbs    = bs;
804     /* malloc & create the natural set of indices */
805     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
806     if (n) {
807       (*ia)[0] = 0;
808       for (j=1; j<bs; j++) {
809         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
810       }
811     }
812 
813     for (i=1; i<n; i++) {
814       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
815       for (j=1; j<bs; j++) {
816         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
817       }
818     }
819     if (n) {
820       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
821     }
822 
823     if (ja) {
824       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
825       cnt = 0;
826       for (i=0; i<n; i++) {
827         for (j=0; j<bs; j++) {
828           for (k=tia[i]; k<tia[i+1]; k++) {
829             for (l=0; l<bs; l++) {
830               (*ja)[cnt++] = bs*tja[k] + l;
831 	    }
832           }
833         }
834       }
835     }
836 
837     n     *= bs;
838     nz *= bs*bs;
839     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
840       ierr = PetscFree(tia);CHKERRQ(ierr);
841       ierr = PetscFree(tja);CHKERRQ(ierr);
842     }
843   } else {
844     *ia = tia;
845     if (ja) *ja = tja;
846   }
847   if (oshift == 1) {
848     for (i=0; i<n+nbs; i++) (*ia)[i]++;
849     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
856 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
857 {
858   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
859   PetscErrorCode ierr;
860   PetscInt       i,n = a->mbs,nz = a->i[n];
861 
862   PetscFunctionBegin;
863   if (!ia) PetscFunctionReturn(0);
864   if (!blockcompressed && A->rmap.bs > 1) {
865     ierr = PetscFree(*ia);CHKERRQ(ierr);
866     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
867   } else if (symmetric) {
868     ierr = PetscFree(*ia);CHKERRQ(ierr);
869     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
870   } else if (oshift == 1) { /* blockcompressed */
871     for (i=0; i<n+1; i++) a->i[i]--;
872     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
873   }
874   PetscFunctionReturn(0);
875 }
876 
877 #undef __FUNCT__
878 #define __FUNCT__ "MatDestroy_SeqBAIJ"
879 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
880 {
881   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
882   PetscErrorCode ierr;
883 
884   PetscFunctionBegin;
885 #if defined(PETSC_USE_LOG)
886   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
887 #endif
888   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
889   if (a->row) {
890     ierr = ISDestroy(a->row);CHKERRQ(ierr);
891   }
892   if (a->col) {
893     ierr = ISDestroy(a->col);CHKERRQ(ierr);
894   }
895   ierr = PetscFree(a->diag);CHKERRQ(ierr);
896   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
897   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
898   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
899   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
900   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
901   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
902 #if defined(PETSC_USE_MAT_SINGLE)
903   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
904 #endif
905   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
906   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
907 
908   ierr = PetscFree(a);CHKERRQ(ierr);
909 
910   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
911   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
912   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
913   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "MatSetOption_SeqBAIJ"
923 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
924 {
925   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
926   PetscErrorCode ierr;
927 
928   PetscFunctionBegin;
929   switch (op) {
930   case MAT_ROW_ORIENTED:
931     a->roworiented    = flg;
932     break;
933   case MAT_KEEP_ZEROED_ROWS:
934     a->keepzeroedrows = flg;
935     break;
936   case MAT_NEW_NONZERO_LOCATIONS:
937     a->nonew          = (flg ? 0 : 1);
938     break;
939   case MAT_NEW_NONZERO_LOCATION_ERR:
940     a->nonew          = (flg ? -1 : 0);
941     break;
942   case MAT_NEW_NONZERO_ALLOCATION_ERR:
943     a->nonew          = (flg ? -2 : 0);
944     break;
945   case MAT_NEW_DIAGONALS:
946   case MAT_IGNORE_OFF_PROC_ENTRIES:
947   case MAT_USE_HASH_TABLE:
948     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
949     break;
950   case MAT_SYMMETRIC:
951   case MAT_STRUCTURALLY_SYMMETRIC:
952   case MAT_HERMITIAN:
953   case MAT_SYMMETRY_ETERNAL:
954     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
955     break;
956   default:
957     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
958   }
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatGetRow_SeqBAIJ"
964 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
965 {
966   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
967   PetscErrorCode ierr;
968   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
969   MatScalar      *aa,*aa_i;
970   PetscScalar    *v_i;
971 
972   PetscFunctionBegin;
973   bs  = A->rmap.bs;
974   ai  = a->i;
975   aj  = a->j;
976   aa  = a->a;
977   bs2 = a->bs2;
978 
979   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
980 
981   bn  = row/bs;   /* Block number */
982   bp  = row % bs; /* Block Position */
983   M   = ai[bn+1] - ai[bn];
984   *nz = bs*M;
985 
986   if (v) {
987     *v = 0;
988     if (*nz) {
989       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
990       for (i=0; i<M; i++) { /* for each block in the block row */
991         v_i  = *v + i*bs;
992         aa_i = aa + bs2*(ai[bn] + i);
993         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
994       }
995     }
996   }
997 
998   if (idx) {
999     *idx = 0;
1000     if (*nz) {
1001       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1002       for (i=0; i<M; i++) { /* for each block in the block row */
1003         idx_i = *idx + i*bs;
1004         itmp  = bs*aj[ai[bn] + i];
1005         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1006       }
1007     }
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1014 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1015 {
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1020   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1026 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
1027 {
1028   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1029   Mat            C;
1030   PetscErrorCode ierr;
1031   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1032   PetscInt       *rows,*cols,bs2=a->bs2;
1033   PetscScalar    *array;
1034 
1035   PetscFunctionBegin;
1036   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1037   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1038   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1039 
1040 #if defined(PETSC_USE_MAT_SINGLE)
1041   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1042   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1043 #else
1044   array = a->a;
1045 #endif
1046 
1047   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1048   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1049   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1050   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1051   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1052   ierr = PetscFree(col);CHKERRQ(ierr);
1053   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1054   cols = rows + bs;
1055   for (i=0; i<mbs; i++) {
1056     cols[0] = i*bs;
1057     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1058     len = ai[i+1] - ai[i];
1059     for (j=0; j<len; j++) {
1060       rows[0] = (*aj++)*bs;
1061       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1062       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1063       array += bs2;
1064     }
1065   }
1066   ierr = PetscFree(rows);CHKERRQ(ierr);
1067 #if defined(PETSC_USE_MAT_SINGLE)
1068   ierr = PetscFree(array);
1069 #endif
1070 
1071   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1072   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1073 
1074   if (B) {
1075     *B = C;
1076   } else {
1077     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1078   }
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1084 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1085 {
1086   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1087   PetscErrorCode ierr;
1088   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1089   int            fd;
1090   PetscScalar    *aa;
1091   FILE           *file;
1092 
1093   PetscFunctionBegin;
1094   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1095   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1096   col_lens[0] = MAT_FILE_COOKIE;
1097 
1098   col_lens[1] = A->rmap.N;
1099   col_lens[2] = A->cmap.n;
1100   col_lens[3] = a->nz*bs2;
1101 
1102   /* store lengths of each row and write (including header) to file */
1103   count = 0;
1104   for (i=0; i<a->mbs; i++) {
1105     for (j=0; j<bs; j++) {
1106       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1107     }
1108   }
1109   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1110   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1111 
1112   /* store column indices (zero start index) */
1113   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1114   count = 0;
1115   for (i=0; i<a->mbs; i++) {
1116     for (j=0; j<bs; j++) {
1117       for (k=a->i[i]; k<a->i[i+1]; k++) {
1118         for (l=0; l<bs; l++) {
1119           jj[count++] = bs*a->j[k] + l;
1120         }
1121       }
1122     }
1123   }
1124   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1125   ierr = PetscFree(jj);CHKERRQ(ierr);
1126 
1127   /* store nonzero values */
1128   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1129   count = 0;
1130   for (i=0; i<a->mbs; i++) {
1131     for (j=0; j<bs; j++) {
1132       for (k=a->i[i]; k<a->i[i+1]; k++) {
1133         for (l=0; l<bs; l++) {
1134           aa[count++] = a->a[bs2*k + l*bs + j];
1135         }
1136       }
1137     }
1138   }
1139   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1140   ierr = PetscFree(aa);CHKERRQ(ierr);
1141 
1142   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1143   if (file) {
1144     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1145   }
1146   PetscFunctionReturn(0);
1147 }
1148 
1149 #undef __FUNCT__
1150 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1151 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1152 {
1153   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1154   PetscErrorCode    ierr;
1155   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1156   PetscViewerFormat format;
1157 
1158   PetscFunctionBegin;
1159   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1160   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1161     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1162   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1163     Mat aij;
1164     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1165     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1166     ierr = MatDestroy(aij);CHKERRQ(ierr);
1167   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1168      PetscFunctionReturn(0);
1169   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1170     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1171     for (i=0; i<a->mbs; i++) {
1172       for (j=0; j<bs; j++) {
1173         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1174         for (k=a->i[i]; k<a->i[i+1]; k++) {
1175           for (l=0; l<bs; l++) {
1176 #if defined(PETSC_USE_COMPLEX)
1177             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1178               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1179                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1180             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1181               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1182                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1183             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1184               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1185             }
1186 #else
1187             if (a->a[bs2*k + l*bs + j] != 0.0) {
1188               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1189             }
1190 #endif
1191           }
1192         }
1193         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1194       }
1195     }
1196     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1197   } else {
1198     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1199     for (i=0; i<a->mbs; i++) {
1200       for (j=0; j<bs; j++) {
1201         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1202         for (k=a->i[i]; k<a->i[i+1]; k++) {
1203           for (l=0; l<bs; l++) {
1204 #if defined(PETSC_USE_COMPLEX)
1205             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1206               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1207                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1208             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1209               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1210                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1211             } else {
1212               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1213             }
1214 #else
1215             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1216 #endif
1217           }
1218         }
1219         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1220       }
1221     }
1222     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1223   }
1224   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1230 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1231 {
1232   Mat            A = (Mat) Aa;
1233   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1234   PetscErrorCode ierr;
1235   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1236   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1237   MatScalar      *aa;
1238   PetscViewer    viewer;
1239 
1240   PetscFunctionBegin;
1241 
1242   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1243   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1244 
1245   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1246 
1247   /* loop over matrix elements drawing boxes */
1248   color = PETSC_DRAW_BLUE;
1249   for (i=0,row=0; i<mbs; i++,row+=bs) {
1250     for (j=a->i[i]; j<a->i[i+1]; j++) {
1251       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1252       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1253       aa = a->a + j*bs2;
1254       for (k=0; k<bs; k++) {
1255         for (l=0; l<bs; l++) {
1256           if (PetscRealPart(*aa++) >=  0.) continue;
1257           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1258         }
1259       }
1260     }
1261   }
1262   color = PETSC_DRAW_CYAN;
1263   for (i=0,row=0; i<mbs; i++,row+=bs) {
1264     for (j=a->i[i]; j<a->i[i+1]; j++) {
1265       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1266       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1267       aa = a->a + j*bs2;
1268       for (k=0; k<bs; k++) {
1269         for (l=0; l<bs; l++) {
1270           if (PetscRealPart(*aa++) != 0.) continue;
1271           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1272         }
1273       }
1274     }
1275   }
1276 
1277   color = PETSC_DRAW_RED;
1278   for (i=0,row=0; i<mbs; i++,row+=bs) {
1279     for (j=a->i[i]; j<a->i[i+1]; j++) {
1280       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1281       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1282       aa = a->a + j*bs2;
1283       for (k=0; k<bs; k++) {
1284         for (l=0; l<bs; l++) {
1285           if (PetscRealPart(*aa++) <= 0.) continue;
1286           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1287         }
1288       }
1289     }
1290   }
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1296 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1297 {
1298   PetscErrorCode ierr;
1299   PetscReal      xl,yl,xr,yr,w,h;
1300   PetscDraw      draw;
1301   PetscTruth     isnull;
1302 
1303   PetscFunctionBegin;
1304 
1305   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1306   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1307 
1308   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1309   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1310   xr += w;    yr += h;  xl = -w;     yl = -h;
1311   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1312   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1313   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatView_SeqBAIJ"
1319 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1320 {
1321   PetscErrorCode ierr;
1322   PetscTruth     iascii,isbinary,isdraw;
1323 
1324   PetscFunctionBegin;
1325   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1326   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1327   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1328   if (iascii){
1329     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1330   } else if (isbinary) {
1331     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1332   } else if (isdraw) {
1333     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1334   } else {
1335     Mat B;
1336     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1337     ierr = MatView(B,viewer);CHKERRQ(ierr);
1338     ierr = MatDestroy(B);CHKERRQ(ierr);
1339   }
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1346 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1347 {
1348   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1349   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1350   PetscInt    *ai = a->i,*ailen = a->ilen;
1351   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1352   MatScalar   *ap,*aa = a->a;
1353 
1354   PetscFunctionBegin;
1355   for (k=0; k<m; k++) { /* loop over rows */
1356     row  = im[k]; brow = row/bs;
1357     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1358     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1359     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1360     nrow = ailen[brow];
1361     for (l=0; l<n; l++) { /* loop over columns */
1362       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1363       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1364       col  = in[l] ;
1365       bcol = col/bs;
1366       cidx = col%bs;
1367       ridx = row%bs;
1368       high = nrow;
1369       low  = 0; /* assume unsorted */
1370       while (high-low > 5) {
1371         t = (low+high)/2;
1372         if (rp[t] > bcol) high = t;
1373         else             low  = t;
1374       }
1375       for (i=low; i<high; i++) {
1376         if (rp[i] > bcol) break;
1377         if (rp[i] == bcol) {
1378           *v++ = ap[bs2*i+bs*cidx+ridx];
1379           goto finished;
1380         }
1381       }
1382       *v++ = 0.0;
1383       finished:;
1384     }
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #if defined(PETSC_USE_MAT_SINGLE)
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1392 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1393 {
1394   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1395   PetscErrorCode ierr;
1396   PetscInt       i,N = m*n*b->bs2;
1397   MatScalar      *vsingle;
1398 
1399   PetscFunctionBegin;
1400   if (N > b->setvalueslen) {
1401     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1402     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1403     b->setvalueslen  = N;
1404   }
1405   vsingle = b->setvaluescopy;
1406   for (i=0; i<N; i++) {
1407     vsingle[i] = v[i];
1408   }
1409   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1410   PetscFunctionReturn(0);
1411 }
1412 #endif
1413 
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1417 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1418 {
1419   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1420   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1421   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1422   PetscErrorCode  ierr;
1423   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1424   PetscTruth      roworiented=a->roworiented;
1425   const MatScalar *value = v;
1426   MatScalar       *ap,*aa = a->a,*bap;
1427 
1428   PetscFunctionBegin;
1429   if (roworiented) {
1430     stepval = (n-1)*bs;
1431   } else {
1432     stepval = (m-1)*bs;
1433   }
1434   for (k=0; k<m; k++) { /* loop over added rows */
1435     row  = im[k];
1436     if (row < 0) continue;
1437 #if defined(PETSC_USE_DEBUG)
1438     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1439 #endif
1440     rp   = aj + ai[row];
1441     ap   = aa + bs2*ai[row];
1442     rmax = imax[row];
1443     nrow = ailen[row];
1444     low  = 0;
1445     high = nrow;
1446     for (l=0; l<n; l++) { /* loop over added columns */
1447       if (in[l] < 0) continue;
1448 #if defined(PETSC_USE_DEBUG)
1449       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1450 #endif
1451       col = in[l];
1452       if (roworiented) {
1453         value = v + k*(stepval+bs)*bs + l*bs;
1454       } else {
1455         value = v + l*(stepval+bs)*bs + k*bs;
1456       }
1457       if (col <= lastcol) low = 0; else high = nrow;
1458       lastcol = col;
1459       while (high-low > 7) {
1460         t = (low+high)/2;
1461         if (rp[t] > col) high = t;
1462         else             low  = t;
1463       }
1464       for (i=low; i<high; i++) {
1465         if (rp[i] > col) break;
1466         if (rp[i] == col) {
1467           bap  = ap +  bs2*i;
1468           if (roworiented) {
1469             if (is == ADD_VALUES) {
1470               for (ii=0; ii<bs; ii++,value+=stepval) {
1471                 for (jj=ii; jj<bs2; jj+=bs) {
1472                   bap[jj] += *value++;
1473                 }
1474               }
1475             } else {
1476               for (ii=0; ii<bs; ii++,value+=stepval) {
1477                 for (jj=ii; jj<bs2; jj+=bs) {
1478                   bap[jj] = *value++;
1479                 }
1480               }
1481             }
1482           } else {
1483             if (is == ADD_VALUES) {
1484               for (ii=0; ii<bs; ii++,value+=stepval) {
1485                 for (jj=0; jj<bs; jj++) {
1486                   *bap++ += *value++;
1487                 }
1488               }
1489             } else {
1490               for (ii=0; ii<bs; ii++,value+=stepval) {
1491                 for (jj=0; jj<bs; jj++) {
1492                   *bap++  = *value++;
1493                 }
1494               }
1495             }
1496           }
1497           goto noinsert2;
1498         }
1499       }
1500       if (nonew == 1) goto noinsert2;
1501       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1502       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1503       N = nrow++ - 1; high++;
1504       /* shift up all the later entries in this row */
1505       for (ii=N; ii>=i; ii--) {
1506         rp[ii+1] = rp[ii];
1507         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1508       }
1509       if (N >= i) {
1510         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1511       }
1512       rp[i] = col;
1513       bap   = ap +  bs2*i;
1514       if (roworiented) {
1515         for (ii=0; ii<bs; ii++,value+=stepval) {
1516           for (jj=ii; jj<bs2; jj+=bs) {
1517             bap[jj] = *value++;
1518           }
1519         }
1520       } else {
1521         for (ii=0; ii<bs; ii++,value+=stepval) {
1522           for (jj=0; jj<bs; jj++) {
1523             *bap++  = *value++;
1524           }
1525         }
1526       }
1527       noinsert2:;
1528       low = i;
1529     }
1530     ailen[row] = nrow;
1531   }
1532   PetscFunctionReturn(0);
1533 }
1534 
1535 #undef __FUNCT__
1536 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1537 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1538 {
1539   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1540   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1541   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1542   PetscErrorCode ierr;
1543   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1544   MatScalar      *aa = a->a,*ap;
1545   PetscReal      ratio=0.6;
1546 
1547   PetscFunctionBegin;
1548   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1549 
1550   if (m) rmax = ailen[0];
1551   for (i=1; i<mbs; i++) {
1552     /* move each row back by the amount of empty slots (fshift) before it*/
1553     fshift += imax[i-1] - ailen[i-1];
1554     rmax   = PetscMax(rmax,ailen[i]);
1555     if (fshift) {
1556       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1557       N = ailen[i];
1558       for (j=0; j<N; j++) {
1559         ip[j-fshift] = ip[j];
1560         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1561       }
1562     }
1563     ai[i] = ai[i-1] + ailen[i-1];
1564   }
1565   if (mbs) {
1566     fshift += imax[mbs-1] - ailen[mbs-1];
1567     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1568   }
1569   /* reset ilen and imax for each row */
1570   for (i=0; i<mbs; i++) {
1571     ailen[i] = imax[i] = ai[i+1] - ai[i];
1572   }
1573   a->nz = ai[mbs];
1574 
1575   /* diagonals may have moved, so kill the diagonal pointers */
1576   a->idiagvalid = PETSC_FALSE;
1577   if (fshift && a->diag) {
1578     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1579     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1580     a->diag = 0;
1581   }
1582   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1583   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1584   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1585   a->reallocs          = 0;
1586   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1587 
1588   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1589   if (a->compressedrow.use){
1590     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1591   }
1592 
1593   A->same_nonzero = PETSC_TRUE;
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 /*
1598    This function returns an array of flags which indicate the locations of contiguous
1599    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1600    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1601    Assume: sizes should be long enough to hold all the values.
1602 */
1603 #undef __FUNCT__
1604 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1605 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1606 {
1607   PetscInt   i,j,k,row;
1608   PetscTruth flg;
1609 
1610   PetscFunctionBegin;
1611   for (i=0,j=0; i<n; j++) {
1612     row = idx[i];
1613     if (row%bs!=0) { /* Not the begining of a block */
1614       sizes[j] = 1;
1615       i++;
1616     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1617       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1618       i++;
1619     } else { /* Begining of the block, so check if the complete block exists */
1620       flg = PETSC_TRUE;
1621       for (k=1; k<bs; k++) {
1622         if (row+k != idx[i+k]) { /* break in the block */
1623           flg = PETSC_FALSE;
1624           break;
1625         }
1626       }
1627       if (flg) { /* No break in the bs */
1628         sizes[j] = bs;
1629         i+= bs;
1630       } else {
1631         sizes[j] = 1;
1632         i++;
1633       }
1634     }
1635   }
1636   *bs_max = j;
1637   PetscFunctionReturn(0);
1638 }
1639 
1640 #undef __FUNCT__
1641 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1642 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1643 {
1644   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1645   PetscErrorCode ierr;
1646   PetscInt       i,j,k,count,*rows;
1647   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1648   PetscScalar    zero = 0.0;
1649   MatScalar      *aa;
1650 
1651   PetscFunctionBegin;
1652   /* Make a copy of the IS and  sort it */
1653   /* allocate memory for rows,sizes */
1654   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1655   sizes = rows + is_n;
1656 
1657   /* copy IS values to rows, and sort them */
1658   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1659   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1660   if (baij->keepzeroedrows) {
1661     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1662     bs_max = is_n;
1663     A->same_nonzero = PETSC_TRUE;
1664   } else {
1665     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1666     A->same_nonzero = PETSC_FALSE;
1667   }
1668 
1669   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1670     row   = rows[j];
1671     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1672     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1673     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
1674     if (sizes[i] == bs && !baij->keepzeroedrows) {
1675       if (diag != 0.0) {
1676         if (baij->ilen[row/bs] > 0) {
1677           baij->ilen[row/bs]       = 1;
1678           baij->j[baij->i[row/bs]] = row/bs;
1679           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1680         }
1681         /* Now insert all the diagonal values for this bs */
1682         for (k=0; k<bs; k++) {
1683           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1684         }
1685       } else { /* (diag == 0.0) */
1686         baij->ilen[row/bs] = 0;
1687       } /* end (diag == 0.0) */
1688     } else { /* (sizes[i] != bs) */
1689 #if defined (PETSC_USE_DEBUG)
1690       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1691 #endif
1692       for (k=0; k<count; k++) {
1693         aa[0] =  zero;
1694         aa    += bs;
1695       }
1696       if (diag != 0.0) {
1697         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1698       }
1699     }
1700   }
1701 
1702   ierr = PetscFree(rows);CHKERRQ(ierr);
1703   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1704   PetscFunctionReturn(0);
1705 }
1706 
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1709 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1710 {
1711   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1712   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1713   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1714   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1715   PetscErrorCode ierr;
1716   PetscInt       ridx,cidx,bs2=a->bs2;
1717   PetscTruth     roworiented=a->roworiented;
1718   MatScalar      *ap,value,*aa=a->a,*bap;
1719 
1720   PetscFunctionBegin;
1721   for (k=0; k<m; k++) { /* loop over added rows */
1722     row  = im[k];
1723     brow = row/bs;
1724     if (row < 0) continue;
1725 #if defined(PETSC_USE_DEBUG)
1726     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1727 #endif
1728     rp   = aj + ai[brow];
1729     ap   = aa + bs2*ai[brow];
1730     rmax = imax[brow];
1731     nrow = ailen[brow];
1732     low  = 0;
1733     high = nrow;
1734     for (l=0; l<n; l++) { /* loop over added columns */
1735       if (in[l] < 0) continue;
1736 #if defined(PETSC_USE_DEBUG)
1737       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1738 #endif
1739       col = in[l]; bcol = col/bs;
1740       ridx = row % bs; cidx = col % bs;
1741       if (roworiented) {
1742         value = v[l + k*n];
1743       } else {
1744         value = v[k + l*m];
1745       }
1746       if (col <= lastcol) low = 0; else high = nrow;
1747       lastcol = col;
1748       while (high-low > 7) {
1749         t = (low+high)/2;
1750         if (rp[t] > bcol) high = t;
1751         else              low  = t;
1752       }
1753       for (i=low; i<high; i++) {
1754         if (rp[i] > bcol) break;
1755         if (rp[i] == bcol) {
1756           bap  = ap +  bs2*i + bs*cidx + ridx;
1757           if (is == ADD_VALUES) *bap += value;
1758           else                  *bap  = value;
1759           goto noinsert1;
1760         }
1761       }
1762       if (nonew == 1) goto noinsert1;
1763       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1764       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1765       N = nrow++ - 1; high++;
1766       /* shift up all the later entries in this row */
1767       for (ii=N; ii>=i; ii--) {
1768         rp[ii+1] = rp[ii];
1769         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1770       }
1771       if (N>=i) {
1772         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1773       }
1774       rp[i]                      = bcol;
1775       ap[bs2*i + bs*cidx + ridx] = value;
1776       a->nz++;
1777       noinsert1:;
1778       low = i;
1779     }
1780     ailen[brow] = nrow;
1781   }
1782   A->same_nonzero = PETSC_FALSE;
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 
1787 #undef __FUNCT__
1788 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1789 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1790 {
1791   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1792   Mat            outA;
1793   PetscErrorCode ierr;
1794   PetscTruth     row_identity,col_identity;
1795 
1796   PetscFunctionBegin;
1797   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1798   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1799   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1800   if (!row_identity || !col_identity) {
1801     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1802   }
1803 
1804   outA          = inA;
1805   inA->factor   = FACTOR_LU;
1806 
1807   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1808 
1809   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1810   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1811   a->row = row;
1812   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1813   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1814   a->col = col;
1815 
1816   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1817   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1818   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1819 
1820   /*
1821       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1822       for ILU(0) factorization with natural ordering
1823   */
1824   if (inA->rmap.bs < 8) {
1825     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1826   } else {
1827     if (!a->solve_work) {
1828       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1829       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1830     }
1831   }
1832 
1833   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1834 
1835   PetscFunctionReturn(0);
1836 }
1837 
1838 EXTERN_C_BEGIN
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1841 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1842 {
1843   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1844   PetscInt    i,nz,nbs;
1845 
1846   PetscFunctionBegin;
1847   nz  = baij->maxnz/baij->bs2;
1848   nbs = baij->nbs;
1849   for (i=0; i<nz; i++) {
1850     baij->j[i] = indices[i];
1851   }
1852   baij->nz = nz;
1853   for (i=0; i<nbs; i++) {
1854     baij->ilen[i] = baij->imax[i];
1855   }
1856 
1857   PetscFunctionReturn(0);
1858 }
1859 EXTERN_C_END
1860 
1861 #undef __FUNCT__
1862 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1863 /*@
1864     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1865        in the matrix.
1866 
1867   Input Parameters:
1868 +  mat - the SeqBAIJ matrix
1869 -  indices - the column indices
1870 
1871   Level: advanced
1872 
1873   Notes:
1874     This can be called if you have precomputed the nonzero structure of the
1875   matrix and want to provide it to the matrix object to improve the performance
1876   of the MatSetValues() operation.
1877 
1878     You MUST have set the correct numbers of nonzeros per row in the call to
1879   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1880 
1881     MUST be called before any calls to MatSetValues();
1882 
1883 @*/
1884 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1885 {
1886   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1887 
1888   PetscFunctionBegin;
1889   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1890   PetscValidPointer(indices,2);
1891   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1892   if (f) {
1893     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1894   } else {
1895     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1896   }
1897   PetscFunctionReturn(0);
1898 }
1899 
1900 #undef __FUNCT__
1901 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
1902 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
1903 {
1904   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1905   PetscErrorCode ierr;
1906   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1907   PetscReal      atmp;
1908   PetscScalar    *x,zero = 0.0;
1909   MatScalar      *aa;
1910   PetscInt       ncols,brow,krow,kcol;
1911 
1912   PetscFunctionBegin;
1913   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1914   bs   = A->rmap.bs;
1915   aa   = a->a;
1916   ai   = a->i;
1917   aj   = a->j;
1918   mbs  = a->mbs;
1919 
1920   ierr = VecSet(v,zero);CHKERRQ(ierr);
1921   if (idx) {
1922     for (i=0; i<A->rmap.n;i++) idx[i] = 0;
1923   }
1924   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1925   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1926   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1927   for (i=0; i<mbs; i++) {
1928     ncols = ai[1] - ai[0]; ai++;
1929     brow  = bs*i;
1930     for (j=0; j<ncols; j++){
1931       for (kcol=0; kcol<bs; kcol++){
1932         for (krow=0; krow<bs; krow++){
1933           atmp = PetscAbsScalar(*aa);aa++;
1934           row = brow + krow;    /* row index */
1935           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1936           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
1937         }
1938       }
1939       aj++;
1940     }
1941   }
1942   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1943   PetscFunctionReturn(0);
1944 }
1945 
1946 #undef __FUNCT__
1947 #define __FUNCT__ "MatCopy_SeqBAIJ"
1948 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1949 {
1950   PetscErrorCode ierr;
1951 
1952   PetscFunctionBegin;
1953   /* If the two matrices have the same copy implementation, use fast copy. */
1954   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1955     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1956     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1957 
1958     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1959       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1960     }
1961     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1962   } else {
1963     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1970 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1971 {
1972   PetscErrorCode ierr;
1973 
1974   PetscFunctionBegin;
1975   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1976   PetscFunctionReturn(0);
1977 }
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1981 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1982 {
1983   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1984   PetscFunctionBegin;
1985   *array = a->a;
1986   PetscFunctionReturn(0);
1987 }
1988 
1989 #undef __FUNCT__
1990 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1991 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1992 {
1993   PetscFunctionBegin;
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #include "petscblaslapack.h"
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2000 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2001 {
2002   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2003   PetscErrorCode ierr;
2004   PetscInt       i,bs=Y->rmap.bs,j,bs2;
2005   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
2006 
2007   PetscFunctionBegin;
2008   if (str == SAME_NONZERO_PATTERN) {
2009     PetscScalar alpha = a;
2010     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2011   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2012     if (y->xtoy && y->XtoY != X) {
2013       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2014       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2015     }
2016     if (!y->xtoy) { /* get xtoy */
2017       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2018       y->XtoY = X;
2019     }
2020     bs2 = bs*bs;
2021     for (i=0; i<x->nz; i++) {
2022       j = 0;
2023       while (j < bs2){
2024         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2025         j++;
2026       }
2027     }
2028     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2029   } else {
2030     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2031   }
2032   PetscFunctionReturn(0);
2033 }
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2037 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2038 {
2039   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2040   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2041   PetscScalar    *aa = a->a;
2042 
2043   PetscFunctionBegin;
2044   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2045   PetscFunctionReturn(0);
2046 }
2047 
2048 #undef __FUNCT__
2049 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2050 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2051 {
2052   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2053   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2054   PetscScalar    *aa = a->a;
2055 
2056   PetscFunctionBegin;
2057   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2058   PetscFunctionReturn(0);
2059 }
2060 
2061 
2062 /* -------------------------------------------------------------------*/
2063 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2064        MatGetRow_SeqBAIJ,
2065        MatRestoreRow_SeqBAIJ,
2066        MatMult_SeqBAIJ_N,
2067 /* 4*/ MatMultAdd_SeqBAIJ_N,
2068        MatMultTranspose_SeqBAIJ,
2069        MatMultTransposeAdd_SeqBAIJ,
2070        MatSolve_SeqBAIJ_N,
2071        0,
2072        0,
2073 /*10*/ 0,
2074        MatLUFactor_SeqBAIJ,
2075        0,
2076        0,
2077        MatTranspose_SeqBAIJ,
2078 /*15*/ MatGetInfo_SeqBAIJ,
2079        MatEqual_SeqBAIJ,
2080        MatGetDiagonal_SeqBAIJ,
2081        MatDiagonalScale_SeqBAIJ,
2082        MatNorm_SeqBAIJ,
2083 /*20*/ 0,
2084        MatAssemblyEnd_SeqBAIJ,
2085        0,
2086        MatSetOption_SeqBAIJ,
2087        MatZeroEntries_SeqBAIJ,
2088 /*25*/ MatZeroRows_SeqBAIJ,
2089        MatLUFactorSymbolic_SeqBAIJ,
2090        MatLUFactorNumeric_SeqBAIJ_N,
2091        MatCholeskyFactorSymbolic_SeqBAIJ,
2092        MatCholeskyFactorNumeric_SeqBAIJ_N,
2093 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2094        MatILUFactorSymbolic_SeqBAIJ,
2095        MatICCFactorSymbolic_SeqBAIJ,
2096        MatGetArray_SeqBAIJ,
2097        MatRestoreArray_SeqBAIJ,
2098 /*35*/ MatDuplicate_SeqBAIJ,
2099        0,
2100        0,
2101        MatILUFactor_SeqBAIJ,
2102        0,
2103 /*40*/ MatAXPY_SeqBAIJ,
2104        MatGetSubMatrices_SeqBAIJ,
2105        MatIncreaseOverlap_SeqBAIJ,
2106        MatGetValues_SeqBAIJ,
2107        MatCopy_SeqBAIJ,
2108 /*45*/ 0,
2109        MatScale_SeqBAIJ,
2110        0,
2111        0,
2112        0,
2113 /*50*/ 0,
2114        MatGetRowIJ_SeqBAIJ,
2115        MatRestoreRowIJ_SeqBAIJ,
2116        0,
2117        0,
2118 /*55*/ 0,
2119        0,
2120        0,
2121        0,
2122        MatSetValuesBlocked_SeqBAIJ,
2123 /*60*/ MatGetSubMatrix_SeqBAIJ,
2124        MatDestroy_SeqBAIJ,
2125        MatView_SeqBAIJ,
2126        0,
2127        0,
2128 /*65*/ 0,
2129        0,
2130        0,
2131        0,
2132        0,
2133 /*70*/ MatGetRowMaxAbs_SeqBAIJ,
2134        MatConvert_Basic,
2135        0,
2136        0,
2137        0,
2138 /*75*/ 0,
2139        0,
2140        0,
2141        0,
2142        0,
2143 /*80*/ 0,
2144        0,
2145        0,
2146        0,
2147        MatLoad_SeqBAIJ,
2148 /*85*/ 0,
2149        0,
2150        0,
2151        0,
2152        0,
2153 /*90*/ 0,
2154        0,
2155        0,
2156        0,
2157        0,
2158 /*95*/ 0,
2159        0,
2160        0,
2161        0,
2162        0,
2163 /*100*/0,
2164        0,
2165        0,
2166        0,
2167        0,
2168 /*105*/0,
2169        MatRealPart_SeqBAIJ,
2170        MatImaginaryPart_SeqBAIJ,
2171        0,
2172        0,
2173 /*110*/0,
2174        0,
2175        0,
2176        0,
2177        MatMissingDiagonal_SeqBAIJ
2178 /*115*/
2179 };
2180 
2181 EXTERN_C_BEGIN
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2184 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2185 {
2186   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2187   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2188   PetscErrorCode ierr;
2189 
2190   PetscFunctionBegin;
2191   if (aij->nonew != 1) {
2192     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2193   }
2194 
2195   /* allocate space for values if not already there */
2196   if (!aij->saved_values) {
2197     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2198   }
2199 
2200   /* copy values over */
2201   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2202   PetscFunctionReturn(0);
2203 }
2204 EXTERN_C_END
2205 
2206 EXTERN_C_BEGIN
2207 #undef __FUNCT__
2208 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2209 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2210 {
2211   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2212   PetscErrorCode ierr;
2213   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2214 
2215   PetscFunctionBegin;
2216   if (aij->nonew != 1) {
2217     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2218   }
2219   if (!aij->saved_values) {
2220     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2221   }
2222 
2223   /* copy values over */
2224   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2225   PetscFunctionReturn(0);
2226 }
2227 EXTERN_C_END
2228 
2229 EXTERN_C_BEGIN
2230 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2231 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2232 EXTERN_C_END
2233 
2234 EXTERN_C_BEGIN
2235 #undef __FUNCT__
2236 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2237 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2238 {
2239   Mat_SeqBAIJ    *b;
2240   PetscErrorCode ierr;
2241   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2242   PetscTruth     flg,skipallocation = PETSC_FALSE;
2243 
2244   PetscFunctionBegin;
2245 
2246   if (nz == MAT_SKIP_ALLOCATION) {
2247     skipallocation = PETSC_TRUE;
2248     nz             = 0;
2249   }
2250 
2251   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2252     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2253   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2254 
2255   if (nnz && newbs != bs) {
2256     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2257   }
2258   bs   = newbs;
2259 
2260   B->rmap.bs = B->cmap.bs = bs;
2261   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
2262   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
2263 
2264   B->preallocated = PETSC_TRUE;
2265 
2266   mbs  = B->rmap.n/bs;
2267   nbs  = B->cmap.n/bs;
2268   bs2  = bs*bs;
2269 
2270   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2271     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2272   }
2273 
2274   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2275   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2276   if (nnz) {
2277     for (i=0; i<mbs; i++) {
2278       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2279       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2280     }
2281   }
2282 
2283   b       = (Mat_SeqBAIJ*)B->data;
2284   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2285     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2286   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2287 
2288   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2289   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2290   if (!flg) {
2291     switch (bs) {
2292     case 1:
2293       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2294       B->ops->mult            = MatMult_SeqBAIJ_1;
2295       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2296       break;
2297     case 2:
2298       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2299       B->ops->mult            = MatMult_SeqBAIJ_2;
2300       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2301       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2302       break;
2303     case 3:
2304       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2305       B->ops->mult            = MatMult_SeqBAIJ_3;
2306       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2307       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2308       break;
2309     case 4:
2310       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2311       B->ops->mult            = MatMult_SeqBAIJ_4;
2312       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2313       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2314       break;
2315     case 5:
2316       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2317       B->ops->mult            = MatMult_SeqBAIJ_5;
2318       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2319       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2320       break;
2321     case 6:
2322       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2323       B->ops->mult            = MatMult_SeqBAIJ_6;
2324       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2325       break;
2326     case 7:
2327       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2328       B->ops->mult            = MatMult_SeqBAIJ_7;
2329       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2330       break;
2331     default:
2332       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2333       B->ops->mult            = MatMult_SeqBAIJ_N;
2334       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2335       break;
2336     }
2337   }
2338   B->rmap.bs      = bs;
2339   b->mbs     = mbs;
2340   b->nbs     = nbs;
2341   if (!skipallocation) {
2342     if (!b->imax) {
2343       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2344     }
2345     /* b->ilen will count nonzeros in each block row so far. */
2346     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2347     if (!nnz) {
2348       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2349       else if (nz <= 0)        nz = 1;
2350       for (i=0; i<mbs; i++) b->imax[i] = nz;
2351       nz = nz*mbs;
2352     } else {
2353       nz = 0;
2354       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2355     }
2356 
2357     /* allocate the matrix space */
2358     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2359     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2360     ierr = PetscLogObjectMemory(B,(B->rmap.N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2361     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2362     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2363     b->singlemalloc = PETSC_TRUE;
2364     b->i[0] = 0;
2365     for (i=1; i<mbs+1; i++) {
2366       b->i[i] = b->i[i-1] + b->imax[i-1];
2367     }
2368     b->free_a     = PETSC_TRUE;
2369     b->free_ij    = PETSC_TRUE;
2370   } else {
2371     b->free_a     = PETSC_FALSE;
2372     b->free_ij    = PETSC_FALSE;
2373   }
2374 
2375   B->rmap.bs          = bs;
2376   b->bs2              = bs2;
2377   b->mbs              = mbs;
2378   b->nz               = 0;
2379   b->maxnz            = nz*bs2;
2380   B->info.nz_unneeded = (PetscReal)b->maxnz;
2381   PetscFunctionReturn(0);
2382 }
2383 EXTERN_C_END
2384 
2385 /*MC
2386    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2387    block sparse compressed row format.
2388 
2389    Options Database Keys:
2390 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2391 
2392   Level: beginner
2393 
2394 .seealso: MatCreateSeqBAIJ()
2395 M*/
2396 
2397 EXTERN_C_BEGIN
2398 #undef __FUNCT__
2399 #define __FUNCT__ "MatCreate_SeqBAIJ"
2400 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2401 {
2402   PetscErrorCode ierr;
2403   PetscMPIInt    size;
2404   Mat_SeqBAIJ    *b;
2405 
2406   PetscFunctionBegin;
2407   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2408   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2409 
2410   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2411   B->data = (void*)b;
2412   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2413   B->factor           = 0;
2414   B->mapping          = 0;
2415   b->row              = 0;
2416   b->col              = 0;
2417   b->icol             = 0;
2418   b->reallocs         = 0;
2419   b->saved_values     = 0;
2420 #if defined(PETSC_USE_MAT_SINGLE)
2421   b->setvalueslen     = 0;
2422   b->setvaluescopy    = PETSC_NULL;
2423 #endif
2424 
2425   b->roworiented      = PETSC_TRUE;
2426   b->nonew            = 0;
2427   b->diag             = 0;
2428   b->solve_work       = 0;
2429   b->mult_work        = 0;
2430   B->spptr            = 0;
2431   B->info.nz_unneeded = (PetscReal)b->maxnz;
2432   b->keepzeroedrows   = PETSC_FALSE;
2433   b->xtoy              = 0;
2434   b->XtoY              = 0;
2435   b->compressedrow.use     = PETSC_FALSE;
2436   b->compressedrow.nrows   = 0;
2437   b->compressedrow.i       = PETSC_NULL;
2438   b->compressedrow.rindex  = PETSC_NULL;
2439   b->compressedrow.checked = PETSC_FALSE;
2440   B->same_nonzero          = PETSC_FALSE;
2441 
2442   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2443                                      "MatInvertBlockDiagonal_SeqBAIJ",
2444                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2445   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2446                                      "MatStoreValues_SeqBAIJ",
2447                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2448   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2449                                      "MatRetrieveValues_SeqBAIJ",
2450                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2451   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2452                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2453                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2454   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2455                                      "MatConvert_SeqBAIJ_SeqAIJ",
2456                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2457   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2458                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2459                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2460   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2461                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2462                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2463   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2464   PetscFunctionReturn(0);
2465 }
2466 EXTERN_C_END
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2470 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2471 {
2472   Mat            C;
2473   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2474   PetscErrorCode ierr;
2475   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2476 
2477   PetscFunctionBegin;
2478   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2479 
2480   *B = 0;
2481   ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
2482   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2483   ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
2484   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2485   c    = (Mat_SeqBAIJ*)C->data;
2486 
2487   C->rmap.N   = A->rmap.N;
2488   C->cmap.N   = A->cmap.N;
2489   C->rmap.bs  = A->rmap.bs;
2490   c->bs2 = a->bs2;
2491   c->mbs = a->mbs;
2492   c->nbs = a->nbs;
2493 
2494   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2495   for (i=0; i<mbs; i++) {
2496     c->imax[i] = a->imax[i];
2497     c->ilen[i] = a->ilen[i];
2498   }
2499 
2500   /* allocate the matrix space */
2501   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2502   c->singlemalloc = PETSC_TRUE;
2503   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2504   if (mbs > 0) {
2505     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2506     if (cpvalues == MAT_COPY_VALUES) {
2507       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2508     } else {
2509       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2510     }
2511   }
2512   c->roworiented = a->roworiented;
2513   c->nonew       = a->nonew;
2514 
2515   if (a->diag) {
2516     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2517     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2518     for (i=0; i<mbs; i++) {
2519       c->diag[i] = a->diag[i];
2520     }
2521   } else c->diag        = 0;
2522   c->nz                 = a->nz;
2523   c->maxnz              = a->maxnz;
2524   c->solve_work         = 0;
2525   c->mult_work          = 0;
2526   c->free_a             = PETSC_TRUE;
2527   c->free_ij            = PETSC_TRUE;
2528   C->preallocated       = PETSC_TRUE;
2529   C->assembled          = PETSC_TRUE;
2530 
2531   c->compressedrow.use     = a->compressedrow.use;
2532   c->compressedrow.nrows   = a->compressedrow.nrows;
2533   c->compressedrow.checked = a->compressedrow.checked;
2534   if ( a->compressedrow.checked && a->compressedrow.use){
2535     i = a->compressedrow.nrows;
2536     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2537     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2538     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2539     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2540   } else {
2541     c->compressedrow.use    = PETSC_FALSE;
2542     c->compressedrow.i      = PETSC_NULL;
2543     c->compressedrow.rindex = PETSC_NULL;
2544   }
2545   C->same_nonzero = A->same_nonzero;
2546   *B = C;
2547   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2548   PetscFunctionReturn(0);
2549 }
2550 
2551 #undef __FUNCT__
2552 #define __FUNCT__ "MatLoad_SeqBAIJ"
2553 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2554 {
2555   Mat_SeqBAIJ    *a;
2556   Mat            B;
2557   PetscErrorCode ierr;
2558   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2559   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2560   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2561   PetscInt       *masked,nmask,tmp,bs2,ishift;
2562   PetscMPIInt    size;
2563   int            fd;
2564   PetscScalar    *aa;
2565   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2566 
2567   PetscFunctionBegin;
2568   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2569     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2570   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2571   bs2  = bs*bs;
2572 
2573   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2574   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2575   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2576   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2577   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2578   M = header[1]; N = header[2]; nz = header[3];
2579 
2580   if (header[3] < 0) {
2581     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2582   }
2583 
2584   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2585 
2586   /*
2587      This code adds extra rows to make sure the number of rows is
2588     divisible by the blocksize
2589   */
2590   mbs        = M/bs;
2591   extra_rows = bs - M + bs*(mbs);
2592   if (extra_rows == bs) extra_rows = 0;
2593   else                  mbs++;
2594   if (extra_rows) {
2595     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2596   }
2597 
2598   /* read in row lengths */
2599   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2600   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2601   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2602 
2603   /* read in column indices */
2604   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2605   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2606   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2607 
2608   /* loop over row lengths determining block row lengths */
2609   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2610   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2611   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2612   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2613   masked   = mask + mbs;
2614   rowcount = 0; nzcount = 0;
2615   for (i=0; i<mbs; i++) {
2616     nmask = 0;
2617     for (j=0; j<bs; j++) {
2618       kmax = rowlengths[rowcount];
2619       for (k=0; k<kmax; k++) {
2620         tmp = jj[nzcount++]/bs;
2621         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2622       }
2623       rowcount++;
2624     }
2625     browlengths[i] += nmask;
2626     /* zero out the mask elements we set */
2627     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2628   }
2629 
2630   /* create our matrix */
2631   ierr = MatCreate(comm,&B);
2632   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2633   ierr = MatSetType(B,type);CHKERRQ(ierr);
2634   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2635   a = (Mat_SeqBAIJ*)B->data;
2636 
2637   /* set matrix "i" values */
2638   a->i[0] = 0;
2639   for (i=1; i<= mbs; i++) {
2640     a->i[i]      = a->i[i-1] + browlengths[i-1];
2641     a->ilen[i-1] = browlengths[i-1];
2642   }
2643   a->nz         = 0;
2644   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2645 
2646   /* read in nonzero values */
2647   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2648   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2649   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2650 
2651   /* set "a" and "j" values into matrix */
2652   nzcount = 0; jcount = 0;
2653   for (i=0; i<mbs; i++) {
2654     nzcountb = nzcount;
2655     nmask    = 0;
2656     for (j=0; j<bs; j++) {
2657       kmax = rowlengths[i*bs+j];
2658       for (k=0; k<kmax; k++) {
2659         tmp = jj[nzcount++]/bs;
2660 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2661       }
2662     }
2663     /* sort the masked values */
2664     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2665 
2666     /* set "j" values into matrix */
2667     maskcount = 1;
2668     for (j=0; j<nmask; j++) {
2669       a->j[jcount++]  = masked[j];
2670       mask[masked[j]] = maskcount++;
2671     }
2672     /* set "a" values into matrix */
2673     ishift = bs2*a->i[i];
2674     for (j=0; j<bs; j++) {
2675       kmax = rowlengths[i*bs+j];
2676       for (k=0; k<kmax; k++) {
2677         tmp       = jj[nzcountb]/bs ;
2678         block     = mask[tmp] - 1;
2679         point     = jj[nzcountb] - bs*tmp;
2680         idx       = ishift + bs2*block + j + bs*point;
2681         a->a[idx] = (MatScalar)aa[nzcountb++];
2682       }
2683     }
2684     /* zero out the mask elements we set */
2685     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2686   }
2687   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2688 
2689   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2690   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2691   ierr = PetscFree(aa);CHKERRQ(ierr);
2692   ierr = PetscFree(jj);CHKERRQ(ierr);
2693   ierr = PetscFree(mask);CHKERRQ(ierr);
2694 
2695   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2696   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2697   ierr = MatView_Private(B);CHKERRQ(ierr);
2698 
2699   *A = B;
2700   PetscFunctionReturn(0);
2701 }
2702 
2703 #undef __FUNCT__
2704 #define __FUNCT__ "MatCreateSeqBAIJ"
2705 /*@C
2706    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2707    compressed row) format.  For good matrix assembly performance the
2708    user should preallocate the matrix storage by setting the parameter nz
2709    (or the array nnz).  By setting these parameters accurately, performance
2710    during matrix assembly can be increased by more than a factor of 50.
2711 
2712    Collective on MPI_Comm
2713 
2714    Input Parameters:
2715 +  comm - MPI communicator, set to PETSC_COMM_SELF
2716 .  bs - size of block
2717 .  m - number of rows
2718 .  n - number of columns
2719 .  nz - number of nonzero blocks  per block row (same for all rows)
2720 -  nnz - array containing the number of nonzero blocks in the various block rows
2721          (possibly different for each block row) or PETSC_NULL
2722 
2723    Output Parameter:
2724 .  A - the matrix
2725 
2726    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2727    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
2728    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
2729    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2730 
2731    Options Database Keys:
2732 .   -mat_no_unroll - uses code that does not unroll the loops in the
2733                      block calculations (much slower)
2734 .    -mat_block_size - size of the blocks to use
2735 
2736    Level: intermediate
2737 
2738    Notes:
2739    The number of rows and columns must be divisible by blocksize.
2740 
2741    If the nnz parameter is given then the nz parameter is ignored
2742 
2743    A nonzero block is any block that as 1 or more nonzeros in it
2744 
2745    The block AIJ format is fully compatible with standard Fortran 77
2746    storage.  That is, the stored row and column indices can begin at
2747    either one (as in Fortran) or zero.  See the users' manual for details.
2748 
2749    Specify the preallocated storage with either nz or nnz (not both).
2750    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2751    allocation.  For additional details, see the users manual chapter on
2752    matrices.
2753 
2754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2755 @*/
2756 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2757 {
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2762   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2763   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2764   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2770 /*@C
2771    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2772    per row in the matrix. For good matrix assembly performance the
2773    user should preallocate the matrix storage by setting the parameter nz
2774    (or the array nnz).  By setting these parameters accurately, performance
2775    during matrix assembly can be increased by more than a factor of 50.
2776 
2777    Collective on MPI_Comm
2778 
2779    Input Parameters:
2780 +  A - the matrix
2781 .  bs - size of block
2782 .  nz - number of block nonzeros per block row (same for all rows)
2783 -  nnz - array containing the number of block nonzeros in the various block rows
2784          (possibly different for each block row) or PETSC_NULL
2785 
2786    Options Database Keys:
2787 .   -mat_no_unroll - uses code that does not unroll the loops in the
2788                      block calculations (much slower)
2789 .    -mat_block_size - size of the blocks to use
2790 
2791    Level: intermediate
2792 
2793    Notes:
2794    If the nnz parameter is given then the nz parameter is ignored
2795 
2796    You can call MatGetInfo() to get information on how effective the preallocation was;
2797    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
2798    You can also run with the option -info and look for messages with the string
2799    malloc in them to see if additional memory allocation was needed.
2800 
2801    The block AIJ format is fully compatible with standard Fortran 77
2802    storage.  That is, the stored row and column indices can begin at
2803    either one (as in Fortran) or zero.  See the users' manual for details.
2804 
2805    Specify the preallocated storage with either nz or nnz (not both).
2806    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2807    allocation.  For additional details, see the users manual chapter on
2808    matrices.
2809 
2810 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
2811 @*/
2812 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2813 {
2814   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2815 
2816   PetscFunctionBegin;
2817   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2818   if (f) {
2819     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2820   }
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 #undef __FUNCT__
2825 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2826 /*@
2827      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2828               (upper triangular entries in CSR format) provided by the user.
2829 
2830      Collective on MPI_Comm
2831 
2832    Input Parameters:
2833 +  comm - must be an MPI communicator of size 1
2834 .  bs - size of block
2835 .  m - number of rows
2836 .  n - number of columns
2837 .  i - row indices
2838 .  j - column indices
2839 -  a - matrix values
2840 
2841    Output Parameter:
2842 .  mat - the matrix
2843 
2844    Level: intermediate
2845 
2846    Notes:
2847        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2848     once the matrix is destroyed
2849 
2850        You cannot set new nonzero locations into this matrix, that will generate an error.
2851 
2852        The i and j indices are 0 based
2853 
2854 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2855 
2856 @*/
2857 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2858 {
2859   PetscErrorCode ierr;
2860   PetscInt       ii;
2861   Mat_SeqBAIJ    *baij;
2862 
2863   PetscFunctionBegin;
2864   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2865   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2866 
2867   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2868   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2869   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2870   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2871   baij = (Mat_SeqBAIJ*)(*mat)->data;
2872   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2873 
2874   baij->i = i;
2875   baij->j = j;
2876   baij->a = a;
2877   baij->singlemalloc = PETSC_FALSE;
2878   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2879   baij->free_a       = PETSC_FALSE;
2880   baij->free_ij       = PETSC_FALSE;
2881 
2882   for (ii=0; ii<m; ii++) {
2883     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2884 #if defined(PETSC_USE_DEBUG)
2885     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2886 #endif
2887   }
2888 #if defined(PETSC_USE_DEBUG)
2889   for (ii=0; ii<baij->i[m]; ii++) {
2890     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2891     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2892   }
2893 #endif
2894 
2895   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2897   PetscFunctionReturn(0);
2898 }
2899