xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4c8c32f384357201972f9dd67b0f2e901704fd35)
1 /*
2     Defines the basic matrix operations for the BAIJ (compressed row)
3   matrix storage format.
4 */
5 #include "src/mat/impls/baij/seq/baij.h"
6 #include "src/inline/spops.h"
7 #include "petscsys.h"                     /*I "petscmat.h" I*/
8 
9 #include "src/inline/ilu.h"
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A)
14 {
15   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   int         *diag_offset,i,bs = a->bs,mbs = a->mbs;
18   PetscScalar *v = a->a,*odiag,*diag,*mdiag;
19 
20   PetscFunctionBegin;
21   if (a->idiagvalid) PetscFunctionReturn(0);
22   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
23   diag_offset = a->diag;
24   if (!a->idiag) {
25     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
26   }
27   diag  = a->idiag;
28   mdiag = a->idiag+bs*bs*mbs;
29   /* factor and invert each block */
30   switch (a->bs){
31     case 2:
32       for (i=0; i<mbs; i++) {
33         odiag   = v + 4*diag_offset[i];
34         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
35 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
36 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
37 	diag    += 4;
38 	mdiag   += 4;
39       }
40       break;
41     case 3:
42       for (i=0; i<mbs; i++) {
43         odiag    = v + 9*diag_offset[i];
44         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
45         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
46         diag[8]  = odiag[8];
47         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
48         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
49         mdiag[8] = odiag[8];
50 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
51 	diag    += 9;
52 	mdiag   += 9;
53       }
54       break;
55     case 4:
56       for (i=0; i<mbs; i++) {
57         odiag  = v + 16*diag_offset[i];
58         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
59         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
61 	diag  += 16;
62 	mdiag += 16;
63       }
64       break;
65     case 5:
66       for (i=0; i<mbs; i++) {
67         odiag = v + 25*diag_offset[i];
68         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
69         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
71 	diag  += 25;
72 	mdiag += 25;
73       }
74       break;
75     default:
76       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %d",a->bs);
77   }
78   a->idiagvalid = PETSC_TRUE;
79   PetscFunctionReturn(0);
80 }
81 
82 #undef __FUNCT__
83 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
84 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
85 {
86   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
87   PetscScalar        *x,x1,x2,s1,s2;
88   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
89   PetscErrorCode ierr;
90   int                m = a->mbs,i,i2,nz,idx;
91   const int          *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   PetscErrorCode ierr;
192   int                m = a->mbs,i,i2,nz,idx;
193   const int          *diag,*ai = a->i,*aj = a->j,*vi;
194 
195   PetscFunctionBegin;
196   its = its*lits;
197   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
198   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
199   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
200   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
201   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
202 
203   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
204 
205   diag  = a->diag;
206   idiag = a->idiag;
207   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
208   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
209 
210   if (flag & SOR_ZERO_INITIAL_GUESS) {
211     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
212       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
213       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
214       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
215       i2     = 3;
216       idiag += 9;
217       for (i=1; i<m; i++) {
218 	v     = aa + 9*ai[i];
219 	vi    = aj + ai[i];
220 	nz    = diag[i] - ai[i];
221 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
222 	while (nz--) {
223 	  idx  = 3*(*vi++);
224 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
225 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
226 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
227 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
228 	  v   += 9;
229 	}
230 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
231 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
232 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
233         idiag   += 9;
234         i2      += 3;
235       }
236       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
237       PetscLogFlops(9*(a->nz));
238     }
239     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
240         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
241       i2    = 0;
242       mdiag = a->idiag+9*a->mbs;
243       for (i=0; i<m; i++) {
244         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
245         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
246         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
247         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
248         mdiag  += 9;
249         i2     += 3;
250       }
251       PetscLogFlops(15*m);
252     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
253       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
254     }
255     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
256       idiag   = a->idiag+9*a->mbs - 9;
257       i2      = 3*m - 3;
258       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
259       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
260       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
261       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
262       idiag -= 9;
263       i2    -= 3;
264       for (i=m-2; i>=0; i--) {
265 	v     = aa + 9*(diag[i]+1);
266 	vi    = aj + diag[i] + 1;
267 	nz    = ai[i+1] - diag[i] - 1;
268 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
269 	while (nz--) {
270 	  idx  = 3*(*vi++);
271 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
272 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
273 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
274 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
275 	  v   += 9;
276 	}
277 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
278 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
279 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
280         idiag   -= 9;
281         i2      -= 3;
282       }
283       PetscLogFlops(9*(a->nz));
284     }
285   } else {
286     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
287   }
288   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
289   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
295 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
296 {
297   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
298   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
299   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
300   PetscErrorCode ierr;
301   int                m = a->mbs,i,i2,nz,idx;
302   const int          *diag,*ai = a->i,*aj = a->j,*vi;
303 
304   PetscFunctionBegin;
305   its = its*lits;
306   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
307   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
308   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
309   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
310   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
311 
312   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
313 
314   diag  = a->diag;
315   idiag = a->idiag;
316   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
317   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
318 
319   if (flag & SOR_ZERO_INITIAL_GUESS) {
320     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
321       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
322       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
323       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
324       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
325       i2     = 4;
326       idiag += 16;
327       for (i=1; i<m; i++) {
328 	v     = aa + 16*ai[i];
329 	vi    = aj + ai[i];
330 	nz    = diag[i] - ai[i];
331 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
332 	while (nz--) {
333 	  idx  = 4*(*vi++);
334 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
335 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
336 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
337 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
338 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
339 	  v   += 16;
340 	}
341 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
342 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
343 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
344 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
345         idiag   += 16;
346         i2      += 4;
347       }
348       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
349       PetscLogFlops(16*(a->nz));
350     }
351     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
352         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
353       i2    = 0;
354       mdiag = a->idiag+16*a->mbs;
355       for (i=0; i<m; i++) {
356         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
357         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
358         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
359         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
360         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
361         mdiag  += 16;
362         i2     += 4;
363       }
364       PetscLogFlops(28*m);
365     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
366       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
367     }
368     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
369       idiag   = a->idiag+16*a->mbs - 16;
370       i2      = 4*m - 4;
371       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
372       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
373       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
374       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
375       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
376       idiag -= 16;
377       i2    -= 4;
378       for (i=m-2; i>=0; i--) {
379 	v     = aa + 16*(diag[i]+1);
380 	vi    = aj + diag[i] + 1;
381 	nz    = ai[i+1] - diag[i] - 1;
382 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
383 	while (nz--) {
384 	  idx  = 4*(*vi++);
385 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
386 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
387 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
388 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
389 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
390 	  v   += 16;
391 	}
392 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
393 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
394 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
395 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
396         idiag   -= 16;
397         i2      -= 4;
398       }
399       PetscLogFlops(16*(a->nz));
400     }
401   } else {
402     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
403   }
404   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
405   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
406   PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
411 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
412 {
413   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
414   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
415   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
416   PetscErrorCode ierr;
417   int                m = a->mbs,i,i2,nz,idx;
418   const int          *diag,*ai = a->i,*aj = a->j,*vi;
419 
420   PetscFunctionBegin;
421   its = its*lits;
422   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
423   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
424   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
425   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
426   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
427 
428   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
429 
430   diag  = a->diag;
431   idiag = a->idiag;
432   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
433   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
434 
435   if (flag & SOR_ZERO_INITIAL_GUESS) {
436     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
437       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
438       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
439       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
440       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
441       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
442       i2     = 5;
443       idiag += 25;
444       for (i=1; i<m; i++) {
445 	v     = aa + 25*ai[i];
446 	vi    = aj + ai[i];
447 	nz    = diag[i] - ai[i];
448 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
449 	while (nz--) {
450 	  idx  = 5*(*vi++);
451 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
452 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
453 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
454 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
455 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
456 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
457 	  v   += 25;
458 	}
459 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
460 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
461 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
462 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
463 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
464         idiag   += 25;
465         i2      += 5;
466       }
467       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
468       PetscLogFlops(25*(a->nz));
469     }
470     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
471         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
472       i2    = 0;
473       mdiag = a->idiag+25*a->mbs;
474       for (i=0; i<m; i++) {
475         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
476         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
477         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
478         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
479         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
480         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
481         mdiag  += 25;
482         i2     += 5;
483       }
484       PetscLogFlops(45*m);
485     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
486       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
487     }
488     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
489       idiag   = a->idiag+25*a->mbs - 25;
490       i2      = 5*m - 5;
491       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
492       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
493       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
494       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
495       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
496       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
497       idiag -= 25;
498       i2    -= 5;
499       for (i=m-2; i>=0; i--) {
500 	v     = aa + 25*(diag[i]+1);
501 	vi    = aj + diag[i] + 1;
502 	nz    = ai[i+1] - diag[i] - 1;
503 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
504 	while (nz--) {
505 	  idx  = 5*(*vi++);
506 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
507 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
508 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
509 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
510 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
511 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
512 	  v   += 25;
513 	}
514 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
515 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
516 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
517 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
518 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
519         idiag   -= 25;
520         i2      -= 5;
521       }
522       PetscLogFlops(25*(a->nz));
523     }
524   } else {
525     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
526   }
527   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
528   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
529   PetscFunctionReturn(0);
530 }
531 
532 /*
533     Special version for Fun3d sequential benchmark
534 */
535 #if defined(PETSC_HAVE_FORTRAN_CAPS)
536 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
537 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
538 #define matsetvaluesblocked4_ matsetvaluesblocked4
539 #endif
540 
541 EXTERN_C_BEGIN
542 #undef __FUNCT__
543 #define __FUNCT__ "matsetvaluesblocked4_"
544 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
545 {
546   Mat               A = *AA;
547   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
548   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
549   int               *ai=a->i,*ailen=a->ilen;
550   int               *aj=a->j,stepval;
551   const PetscScalar *value = v;
552   MatScalar         *ap,*aa = a->a,*bap;
553 
554   PetscFunctionBegin;
555   stepval = (n-1)*4;
556   for (k=0; k<m; k++) { /* loop over added rows */
557     row  = im[k];
558     rp   = aj + ai[row];
559     ap   = aa + 16*ai[row];
560     nrow = ailen[row];
561     low  = 0;
562     for (l=0; l<n; l++) { /* loop over added columns */
563       col = in[l];
564       value = v + k*(stepval+4)*4 + l*4;
565       low = 0; high = nrow;
566       while (high-low > 7) {
567         t = (low+high)/2;
568         if (rp[t] > col) high = t;
569         else             low  = t;
570       }
571       for (i=low; i<high; i++) {
572         if (rp[i] > col) break;
573         if (rp[i] == col) {
574           bap  = ap +  16*i;
575           for (ii=0; ii<4; ii++,value+=stepval) {
576             for (jj=ii; jj<16; jj+=4) {
577               bap[jj] += *value++;
578             }
579           }
580           goto noinsert2;
581         }
582       }
583       N = nrow++ - 1;
584       /* shift up all the later entries in this row */
585       for (ii=N; ii>=i; ii--) {
586         rp[ii+1] = rp[ii];
587         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
588       }
589       if (N >= i) {
590         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
591       }
592       rp[i] = col;
593       bap   = ap +  16*i;
594       for (ii=0; ii<4; ii++,value+=stepval) {
595         for (jj=ii; jj<16; jj+=4) {
596           bap[jj] = *value++;
597         }
598       }
599       noinsert2:;
600       low = i;
601     }
602     ailen[row] = nrow;
603   }
604 }
605 EXTERN_C_END
606 
607 #if defined(PETSC_HAVE_FORTRAN_CAPS)
608 #define matsetvalues4_ MATSETVALUES4
609 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
610 #define matsetvalues4_ matsetvalues4
611 #endif
612 
613 EXTERN_C_BEGIN
614 #undef __FUNCT__
615 #define __FUNCT__ "MatSetValues4_"
616 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
617 {
618   Mat         A = *AA;
619   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
620   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
621   int         *ai=a->i,*ailen=a->ilen;
622   int         *aj=a->j,brow,bcol;
623   int         ridx,cidx;
624   MatScalar   *ap,value,*aa=a->a,*bap;
625 
626   PetscFunctionBegin;
627   for (k=0; k<m; k++) { /* loop over added rows */
628     row  = im[k]; brow = row/4;
629     rp   = aj + ai[brow];
630     ap   = aa + 16*ai[brow];
631     nrow = ailen[brow];
632     low  = 0;
633     for (l=0; l<n; l++) { /* loop over added columns */
634       col = in[l]; bcol = col/4;
635       ridx = row % 4; cidx = col % 4;
636       value = v[l + k*n];
637       low = 0; high = nrow;
638       while (high-low > 7) {
639         t = (low+high)/2;
640         if (rp[t] > bcol) high = t;
641         else              low  = t;
642       }
643       for (i=low; i<high; i++) {
644         if (rp[i] > bcol) break;
645         if (rp[i] == bcol) {
646           bap  = ap +  16*i + 4*cidx + ridx;
647           *bap += value;
648           goto noinsert1;
649         }
650       }
651       N = nrow++ - 1;
652       /* shift up all the later entries in this row */
653       for (ii=N; ii>=i; ii--) {
654         rp[ii+1] = rp[ii];
655         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
656       }
657       if (N>=i) {
658         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
659       }
660       rp[i]                    = bcol;
661       ap[16*i + 4*cidx + ridx] = value;
662       noinsert1:;
663       low = i;
664     }
665     ailen[brow] = nrow;
666   }
667 }
668 EXTERN_C_END
669 
670 /*  UGLY, ugly, ugly
671    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
672    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
673    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
674    converts the entries into single precision and then calls ..._MatScalar() to put them
675    into the single precision data structures.
676 */
677 #if defined(PETSC_USE_MAT_SINGLE)
678 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
679 #else
680 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
681 #endif
682 
683 #define CHUNKSIZE  10
684 
685 /*
686      Checks for missing diagonals
687 */
688 #undef __FUNCT__
689 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
690 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
691 {
692   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
693   PetscErrorCode ierr;
694   int         *diag,*jj = a->j,i;
695 
696   PetscFunctionBegin;
697   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
698   diag = a->diag;
699   for (i=0; i<a->mbs; i++) {
700     if (jj[diag[i]] != i) {
701       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %d",i);
702     }
703   }
704   PetscFunctionReturn(0);
705 }
706 
707 #undef __FUNCT__
708 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
709 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
710 {
711   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
712   PetscErrorCode ierr;
713   int         i,j,*diag,m = a->mbs;
714 
715   PetscFunctionBegin;
716   if (a->diag) PetscFunctionReturn(0);
717 
718   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
719   PetscLogObjectMemory(A,(m+1)*sizeof(int));
720   for (i=0; i<m; i++) {
721     diag[i] = a->i[i+1];
722     for (j=a->i[i]; j<a->i[i+1]; j++) {
723       if (a->j[j] == i) {
724         diag[i] = j;
725         break;
726       }
727     }
728   }
729   a->diag = diag;
730   PetscFunctionReturn(0);
731 }
732 
733 
734 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
735 
736 #undef __FUNCT__
737 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
738 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
739 {
740   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
741   PetscErrorCode ierr;
742   int         n = a->mbs,i;
743 
744   PetscFunctionBegin;
745   *nn = n;
746   if (!ia) PetscFunctionReturn(0);
747   if (symmetric) {
748     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
749   } else if (oshift == 1) {
750     /* temporarily add 1 to i and j indices */
751     int nz = a->i[n];
752     for (i=0; i<nz; i++) a->j[i]++;
753     for (i=0; i<n+1; i++) a->i[i]++;
754     *ia = a->i; *ja = a->j;
755   } else {
756     *ia = a->i; *ja = a->j;
757   }
758 
759   PetscFunctionReturn(0);
760 }
761 
762 #undef __FUNCT__
763 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
764 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
765 {
766   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
767   PetscErrorCode ierr;
768   int         i,n = a->mbs;
769 
770   PetscFunctionBegin;
771   if (!ia) PetscFunctionReturn(0);
772   if (symmetric) {
773     ierr = PetscFree(*ia);CHKERRQ(ierr);
774     ierr = PetscFree(*ja);CHKERRQ(ierr);
775   } else if (oshift == 1) {
776     int nz = a->i[n]-1;
777     for (i=0; i<nz; i++) a->j[i]--;
778     for (i=0; i<n+1; i++) a->i[i]--;
779   }
780   PetscFunctionReturn(0);
781 }
782 
783 #undef __FUNCT__
784 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
785 PetscErrorCode MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
786 {
787   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
788 
789   PetscFunctionBegin;
790   *bs = baij->bs;
791   PetscFunctionReturn(0);
792 }
793 
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatDestroy_SeqBAIJ"
797 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
798 {
799   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
800   PetscErrorCode ierr;
801 
802   PetscFunctionBegin;
803 #if defined(PETSC_USE_LOG)
804   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
805 #endif
806   ierr = PetscFree(a->a);CHKERRQ(ierr);
807   if (!a->singlemalloc) {
808     ierr = PetscFree(a->i);CHKERRQ(ierr);
809     ierr = PetscFree(a->j);CHKERRQ(ierr);
810   }
811   if (a->row) {
812     ierr = ISDestroy(a->row);CHKERRQ(ierr);
813   }
814   if (a->col) {
815     ierr = ISDestroy(a->col);CHKERRQ(ierr);
816   }
817   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
818   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
819   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
820   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
821   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
822   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
823   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
824   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
825 #if defined(PETSC_USE_MAT_SINGLE)
826   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
827 #endif
828   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
829 
830   ierr = PetscFree(a);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatSetOption_SeqBAIJ"
836 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
837 {
838   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
839 
840   PetscFunctionBegin;
841   switch (op) {
842   case MAT_ROW_ORIENTED:
843     a->roworiented    = PETSC_TRUE;
844     break;
845   case MAT_COLUMN_ORIENTED:
846     a->roworiented    = PETSC_FALSE;
847     break;
848   case MAT_COLUMNS_SORTED:
849     a->sorted         = PETSC_TRUE;
850     break;
851   case MAT_COLUMNS_UNSORTED:
852     a->sorted         = PETSC_FALSE;
853     break;
854   case MAT_KEEP_ZEROED_ROWS:
855     a->keepzeroedrows = PETSC_TRUE;
856     break;
857   case MAT_NO_NEW_NONZERO_LOCATIONS:
858     a->nonew          = 1;
859     break;
860   case MAT_NEW_NONZERO_LOCATION_ERR:
861     a->nonew          = -1;
862     break;
863   case MAT_NEW_NONZERO_ALLOCATION_ERR:
864     a->nonew          = -2;
865     break;
866   case MAT_YES_NEW_NONZERO_LOCATIONS:
867     a->nonew          = 0;
868     break;
869   case MAT_ROWS_SORTED:
870   case MAT_ROWS_UNSORTED:
871   case MAT_YES_NEW_DIAGONALS:
872   case MAT_IGNORE_OFF_PROC_ENTRIES:
873   case MAT_USE_HASH_TABLE:
874     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
875     break;
876   case MAT_NO_NEW_DIAGONALS:
877     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
878   case MAT_SYMMETRIC:
879   case MAT_STRUCTURALLY_SYMMETRIC:
880   case MAT_NOT_SYMMETRIC:
881   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
882   case MAT_HERMITIAN:
883   case MAT_NOT_HERMITIAN:
884   case MAT_SYMMETRY_ETERNAL:
885   case MAT_NOT_SYMMETRY_ETERNAL:
886     break;
887   default:
888     SETERRQ(PETSC_ERR_SUP,"unknown option");
889   }
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatGetRow_SeqBAIJ"
895 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
896 {
897   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
898   PetscErrorCode ierr;
899   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
900   MatScalar    *aa,*aa_i;
901   PetscScalar  *v_i;
902 
903   PetscFunctionBegin;
904   bs  = a->bs;
905   ai  = a->i;
906   aj  = a->j;
907   aa  = a->a;
908   bs2 = a->bs2;
909 
910   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
911 
912   bn  = row/bs;   /* Block number */
913   bp  = row % bs; /* Block Position */
914   M   = ai[bn+1] - ai[bn];
915   *nz = bs*M;
916 
917   if (v) {
918     *v = 0;
919     if (*nz) {
920       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
921       for (i=0; i<M; i++) { /* for each block in the block row */
922         v_i  = *v + i*bs;
923         aa_i = aa + bs2*(ai[bn] + i);
924         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
925       }
926     }
927   }
928 
929   if (idx) {
930     *idx = 0;
931     if (*nz) {
932       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
933       for (i=0; i<M; i++) { /* for each block in the block row */
934         idx_i = *idx + i*bs;
935         itmp  = bs*aj[ai[bn] + i];
936         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
937       }
938     }
939   }
940   PetscFunctionReturn(0);
941 }
942 
943 #undef __FUNCT__
944 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
945 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
946 {
947   PetscErrorCode ierr;
948 
949   PetscFunctionBegin;
950   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
951   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
952   PetscFunctionReturn(0);
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatTranspose_SeqBAIJ"
957 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
958 {
959   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
960   Mat         C;
961   PetscErrorCode ierr;
962   int         i,j,k,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
963   int         *rows,*cols,bs2=a->bs2;
964   PetscScalar *array;
965 
966   PetscFunctionBegin;
967   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
968   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
969   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
970 
971 #if defined(PETSC_USE_MAT_SINGLE)
972   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
973   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
974 #else
975   array = a->a;
976 #endif
977 
978   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
979   ierr = MatCreate(A->comm,A->n,A->m,A->n,A->m,&C);CHKERRQ(ierr);
980   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
981   ierr = MatSeqBAIJSetPreallocation(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
982   ierr = PetscFree(col);CHKERRQ(ierr);
983   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
984   cols = rows + bs;
985   for (i=0; i<mbs; i++) {
986     cols[0] = i*bs;
987     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
988     len = ai[i+1] - ai[i];
989     for (j=0; j<len; j++) {
990       rows[0] = (*aj++)*bs;
991       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
992       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
993       array += bs2;
994     }
995   }
996   ierr = PetscFree(rows);CHKERRQ(ierr);
997 #if defined(PETSC_USE_MAT_SINGLE)
998   ierr = PetscFree(array);
999 #endif
1000 
1001   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1002   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1003 
1004   if (B) {
1005     *B = C;
1006   } else {
1007     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1008   }
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1014 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1015 {
1016   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1017   PetscErrorCode ierr;
1018   int         i,fd,*col_lens,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1019   PetscScalar *aa;
1020   FILE        *file;
1021 
1022   PetscFunctionBegin;
1023   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1024   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
1025   col_lens[0] = MAT_FILE_COOKIE;
1026 
1027   col_lens[1] = A->m;
1028   col_lens[2] = A->n;
1029   col_lens[3] = a->nz*bs2;
1030 
1031   /* store lengths of each row and write (including header) to file */
1032   count = 0;
1033   for (i=0; i<a->mbs; i++) {
1034     for (j=0; j<bs; j++) {
1035       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1036     }
1037   }
1038   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
1039   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1040 
1041   /* store column indices (zero start index) */
1042   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
1043   count = 0;
1044   for (i=0; i<a->mbs; i++) {
1045     for (j=0; j<bs; j++) {
1046       for (k=a->i[i]; k<a->i[i+1]; k++) {
1047         for (l=0; l<bs; l++) {
1048           jj[count++] = bs*a->j[k] + l;
1049         }
1050       }
1051     }
1052   }
1053   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
1054   ierr = PetscFree(jj);CHKERRQ(ierr);
1055 
1056   /* store nonzero values */
1057   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1058   count = 0;
1059   for (i=0; i<a->mbs; i++) {
1060     for (j=0; j<bs; j++) {
1061       for (k=a->i[i]; k<a->i[i+1]; k++) {
1062         for (l=0; l<bs; l++) {
1063           aa[count++] = a->a[bs2*k + l*bs + j];
1064         }
1065       }
1066     }
1067   }
1068   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
1069   ierr = PetscFree(aa);CHKERRQ(ierr);
1070 
1071   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1072   if (file) {
1073     fprintf(file,"-matload_block_size %d\n",a->bs);
1074   }
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1080 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1081 {
1082   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1083   PetscErrorCode ierr;
1084   int               i,j,bs = a->bs,k,l,bs2=a->bs2;
1085   PetscViewerFormat format;
1086 
1087   PetscFunctionBegin;
1088   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1089   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1090     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1091   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1092     Mat aij;
1093     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1094     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1095     ierr = MatDestroy(aij);CHKERRQ(ierr);
1096   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1097      PetscFunctionReturn(0);
1098   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1099     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1100     for (i=0; i<a->mbs; i++) {
1101       for (j=0; j<bs; j++) {
1102         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1103         for (k=a->i[i]; k<a->i[i+1]; k++) {
1104           for (l=0; l<bs; l++) {
1105 #if defined(PETSC_USE_COMPLEX)
1106             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1107               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1108                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1109             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1110               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1111                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1112             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1113               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1114             }
1115 #else
1116             if (a->a[bs2*k + l*bs + j] != 0.0) {
1117               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1118             }
1119 #endif
1120           }
1121         }
1122         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1123       }
1124     }
1125     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1126   } else {
1127     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1128     for (i=0; i<a->mbs; i++) {
1129       for (j=0; j<bs; j++) {
1130         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1131         for (k=a->i[i]; k<a->i[i+1]; k++) {
1132           for (l=0; l<bs; l++) {
1133 #if defined(PETSC_USE_COMPLEX)
1134             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1135               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1136                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1137             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1138               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1139                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1140             } else {
1141               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1142             }
1143 #else
1144             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1145 #endif
1146           }
1147         }
1148         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1149       }
1150     }
1151     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1152   }
1153   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1159 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1160 {
1161   Mat          A = (Mat) Aa;
1162   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1163   PetscErrorCode ierr;
1164   int          row,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1165   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1166   MatScalar    *aa;
1167   PetscViewer  viewer;
1168 
1169   PetscFunctionBegin;
1170 
1171   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1172   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1173 
1174   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1175 
1176   /* loop over matrix elements drawing boxes */
1177   color = PETSC_DRAW_BLUE;
1178   for (i=0,row=0; i<mbs; i++,row+=bs) {
1179     for (j=a->i[i]; j<a->i[i+1]; j++) {
1180       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1181       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1182       aa = a->a + j*bs2;
1183       for (k=0; k<bs; k++) {
1184         for (l=0; l<bs; l++) {
1185           if (PetscRealPart(*aa++) >=  0.) continue;
1186           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1187         }
1188       }
1189     }
1190   }
1191   color = PETSC_DRAW_CYAN;
1192   for (i=0,row=0; i<mbs; i++,row+=bs) {
1193     for (j=a->i[i]; j<a->i[i+1]; j++) {
1194       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1195       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1196       aa = a->a + j*bs2;
1197       for (k=0; k<bs; k++) {
1198         for (l=0; l<bs; l++) {
1199           if (PetscRealPart(*aa++) != 0.) continue;
1200           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1201         }
1202       }
1203     }
1204   }
1205 
1206   color = PETSC_DRAW_RED;
1207   for (i=0,row=0; i<mbs; i++,row+=bs) {
1208     for (j=a->i[i]; j<a->i[i+1]; j++) {
1209       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1210       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1211       aa = a->a + j*bs2;
1212       for (k=0; k<bs; k++) {
1213         for (l=0; l<bs; l++) {
1214           if (PetscRealPart(*aa++) <= 0.) continue;
1215           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1216         }
1217       }
1218     }
1219   }
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1225 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1226 {
1227   PetscErrorCode ierr;
1228   PetscReal    xl,yl,xr,yr,w,h;
1229   PetscDraw    draw;
1230   PetscTruth   isnull;
1231 
1232   PetscFunctionBegin;
1233 
1234   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1235   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1236 
1237   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1238   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1239   xr += w;    yr += h;  xl = -w;     yl = -h;
1240   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1241   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1242   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatView_SeqBAIJ"
1248 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1249 {
1250   PetscErrorCode ierr;
1251   PetscTruth iascii,isbinary,isdraw;
1252 
1253   PetscFunctionBegin;
1254   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1255   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1256   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1257   if (iascii){
1258     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1259   } else if (isbinary) {
1260     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1261   } else if (isdraw) {
1262     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1263   } else {
1264     Mat B;
1265     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1266     ierr = MatView(B,viewer);CHKERRQ(ierr);
1267     ierr = MatDestroy(B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1275 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1276 {
1277   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1278   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1279   int        *ai = a->i,*ailen = a->ilen;
1280   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1281   MatScalar  *ap,*aa = a->a,zero = 0.0;
1282 
1283   PetscFunctionBegin;
1284   for (k=0; k<m; k++) { /* loop over rows */
1285     row  = im[k]; brow = row/bs;
1286     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1287     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1288     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1289     nrow = ailen[brow];
1290     for (l=0; l<n; l++) { /* loop over columns */
1291       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1292       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1293       col  = in[l] ;
1294       bcol = col/bs;
1295       cidx = col%bs;
1296       ridx = row%bs;
1297       high = nrow;
1298       low  = 0; /* assume unsorted */
1299       while (high-low > 5) {
1300         t = (low+high)/2;
1301         if (rp[t] > bcol) high = t;
1302         else             low  = t;
1303       }
1304       for (i=low; i<high; i++) {
1305         if (rp[i] > bcol) break;
1306         if (rp[i] == bcol) {
1307           *v++ = ap[bs2*i+bs*cidx+ridx];
1308           goto finished;
1309         }
1310       }
1311       *v++ = zero;
1312       finished:;
1313     }
1314   }
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #if defined(PETSC_USE_MAT_SINGLE)
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1321 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1322 {
1323   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1324   PetscErrorCode ierr;
1325   int         i,N = m*n*b->bs2;
1326   MatScalar   *vsingle;
1327 
1328   PetscFunctionBegin;
1329   if (N > b->setvalueslen) {
1330     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1331     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1332     b->setvalueslen  = N;
1333   }
1334   vsingle = b->setvaluescopy;
1335   for (i=0; i<N; i++) {
1336     vsingle[i] = v[i];
1337   }
1338   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 #endif
1342 
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1346 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1347 {
1348   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1349   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1350   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1351   PetscErrorCode ierr;
1352   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval;
1353   PetscTruth        roworiented=a->roworiented;
1354   const MatScalar   *value = v;
1355   MatScalar         *ap,*aa = a->a,*bap;
1356 
1357   PetscFunctionBegin;
1358   if (roworiented) {
1359     stepval = (n-1)*bs;
1360   } else {
1361     stepval = (m-1)*bs;
1362   }
1363   for (k=0; k<m; k++) { /* loop over added rows */
1364     row  = im[k];
1365     if (row < 0) continue;
1366 #if defined(PETSC_USE_BOPT_g)
1367     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1368 #endif
1369     rp   = aj + ai[row];
1370     ap   = aa + bs2*ai[row];
1371     rmax = imax[row];
1372     nrow = ailen[row];
1373     low  = 0;
1374     for (l=0; l<n; l++) { /* loop over added columns */
1375       if (in[l] < 0) continue;
1376 #if defined(PETSC_USE_BOPT_g)
1377       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1378 #endif
1379       col = in[l];
1380       if (roworiented) {
1381         value = v + k*(stepval+bs)*bs + l*bs;
1382       } else {
1383         value = v + l*(stepval+bs)*bs + k*bs;
1384       }
1385       if (!sorted) low = 0; high = nrow;
1386       while (high-low > 7) {
1387         t = (low+high)/2;
1388         if (rp[t] > col) high = t;
1389         else             low  = t;
1390       }
1391       for (i=low; i<high; i++) {
1392         if (rp[i] > col) break;
1393         if (rp[i] == col) {
1394           bap  = ap +  bs2*i;
1395           if (roworiented) {
1396             if (is == ADD_VALUES) {
1397               for (ii=0; ii<bs; ii++,value+=stepval) {
1398                 for (jj=ii; jj<bs2; jj+=bs) {
1399                   bap[jj] += *value++;
1400                 }
1401               }
1402             } else {
1403               for (ii=0; ii<bs; ii++,value+=stepval) {
1404                 for (jj=ii; jj<bs2; jj+=bs) {
1405                   bap[jj] = *value++;
1406                 }
1407               }
1408             }
1409           } else {
1410             if (is == ADD_VALUES) {
1411               for (ii=0; ii<bs; ii++,value+=stepval) {
1412                 for (jj=0; jj<bs; jj++) {
1413                   *bap++ += *value++;
1414                 }
1415               }
1416             } else {
1417               for (ii=0; ii<bs; ii++,value+=stepval) {
1418                 for (jj=0; jj<bs; jj++) {
1419                   *bap++  = *value++;
1420                 }
1421               }
1422             }
1423           }
1424           goto noinsert2;
1425         }
1426       }
1427       if (nonew == 1) goto noinsert2;
1428       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1429       if (nrow >= rmax) {
1430         /* there is no extra room in row, therefore enlarge */
1431         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1432         MatScalar *new_a;
1433 
1434         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1435 
1436         /* malloc new storage space */
1437         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1438 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1439         new_j   = (int*)(new_a + bs2*new_nz);
1440         new_i   = new_j + new_nz;
1441 
1442         /* copy over old data into new slots */
1443         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1444         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1445         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
1446         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1447         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
1448         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1449         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1450         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1451         /* free up old matrix storage */
1452         ierr = PetscFree(a->a);CHKERRQ(ierr);
1453         if (!a->singlemalloc) {
1454           ierr = PetscFree(a->i);CHKERRQ(ierr);
1455           ierr = PetscFree(a->j);CHKERRQ(ierr);
1456         }
1457         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1458         a->singlemalloc = PETSC_TRUE;
1459 
1460         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1461         rmax = imax[row] = imax[row] + CHUNKSIZE;
1462         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1463         a->maxnz += bs2*CHUNKSIZE;
1464         a->reallocs++;
1465         a->nz++;
1466       }
1467       N = nrow++ - 1;
1468       /* shift up all the later entries in this row */
1469       for (ii=N; ii>=i; ii--) {
1470         rp[ii+1] = rp[ii];
1471         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1472       }
1473       if (N >= i) {
1474         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1475       }
1476       rp[i] = col;
1477       bap   = ap +  bs2*i;
1478       if (roworiented) {
1479         for (ii=0; ii<bs; ii++,value+=stepval) {
1480           for (jj=ii; jj<bs2; jj+=bs) {
1481             bap[jj] = *value++;
1482           }
1483         }
1484       } else {
1485         for (ii=0; ii<bs; ii++,value+=stepval) {
1486           for (jj=0; jj<bs; jj++) {
1487             *bap++  = *value++;
1488           }
1489         }
1490       }
1491       noinsert2:;
1492       low = i;
1493     }
1494     ailen[row] = nrow;
1495   }
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1501 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1502 {
1503   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1504   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1505   int        m = A->m,*ip,N,*ailen = a->ilen;
1506   PetscErrorCode ierr;
1507   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1508   MatScalar  *aa = a->a,*ap;
1509 
1510   PetscFunctionBegin;
1511   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1512 
1513   if (m) rmax = ailen[0];
1514   for (i=1; i<mbs; i++) {
1515     /* move each row back by the amount of empty slots (fshift) before it*/
1516     fshift += imax[i-1] - ailen[i-1];
1517     rmax   = PetscMax(rmax,ailen[i]);
1518     if (fshift) {
1519       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1520       N = ailen[i];
1521       for (j=0; j<N; j++) {
1522         ip[j-fshift] = ip[j];
1523         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1524       }
1525     }
1526     ai[i] = ai[i-1] + ailen[i-1];
1527   }
1528   if (mbs) {
1529     fshift += imax[mbs-1] - ailen[mbs-1];
1530     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1531   }
1532   /* reset ilen and imax for each row */
1533   for (i=0; i<mbs; i++) {
1534     ailen[i] = imax[i] = ai[i+1] - ai[i];
1535   }
1536   a->nz = ai[mbs];
1537 
1538   /* diagonals may have moved, so kill the diagonal pointers */
1539   a->idiagvalid = PETSC_FALSE;
1540   if (fshift && a->diag) {
1541     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1542     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1543     a->diag = 0;
1544   }
1545   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1546   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1547   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1548   a->reallocs          = 0;
1549   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1550 
1551   PetscFunctionReturn(0);
1552 }
1553 
1554 
1555 
1556 /*
1557    This function returns an array of flags which indicate the locations of contiguous
1558    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1559    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1560    Assume: sizes should be long enough to hold all the values.
1561 */
1562 #undef __FUNCT__
1563 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1564 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1565 {
1566   int        i,j,k,row;
1567   PetscTruth flg;
1568 
1569   PetscFunctionBegin;
1570   for (i=0,j=0; i<n; j++) {
1571     row = idx[i];
1572     if (row%bs!=0) { /* Not the begining of a block */
1573       sizes[j] = 1;
1574       i++;
1575     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1576       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1577       i++;
1578     } else { /* Begining of the block, so check if the complete block exists */
1579       flg = PETSC_TRUE;
1580       for (k=1; k<bs; k++) {
1581         if (row+k != idx[i+k]) { /* break in the block */
1582           flg = PETSC_FALSE;
1583           break;
1584         }
1585       }
1586       if (flg == PETSC_TRUE) { /* No break in the bs */
1587         sizes[j] = bs;
1588         i+= bs;
1589       } else {
1590         sizes[j] = 1;
1591         i++;
1592       }
1593     }
1594   }
1595   *bs_max = j;
1596   PetscFunctionReturn(0);
1597 }
1598 
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1601 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1602 {
1603   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1604   PetscErrorCode ierr;
1605   int         i,j,k,count,is_n,*is_idx,*rows;
1606   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1607   PetscScalar zero = 0.0;
1608   MatScalar   *aa;
1609 
1610   PetscFunctionBegin;
1611   /* Make a copy of the IS and  sort it */
1612   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1613   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1614 
1615   /* allocate memory for rows,sizes */
1616   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1617   sizes = rows + is_n;
1618 
1619   /* copy IS values to rows, and sort them */
1620   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1621   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1622   if (baij->keepzeroedrows) {
1623     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1624     bs_max = is_n;
1625   } else {
1626     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1627   }
1628   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1629 
1630   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1631     row   = rows[j];
1632     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1633     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1634     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1635     if (sizes[i] == bs && !baij->keepzeroedrows) {
1636       if (diag) {
1637         if (baij->ilen[row/bs] > 0) {
1638           baij->ilen[row/bs]       = 1;
1639           baij->j[baij->i[row/bs]] = row/bs;
1640           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1641         }
1642         /* Now insert all the diagonal values for this bs */
1643         for (k=0; k<bs; k++) {
1644           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1645         }
1646       } else { /* (!diag) */
1647         baij->ilen[row/bs] = 0;
1648       } /* end (!diag) */
1649     } else { /* (sizes[i] != bs) */
1650 #if defined (PETSC_USE_DEBUG)
1651       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1652 #endif
1653       for (k=0; k<count; k++) {
1654         aa[0] =  zero;
1655         aa    += bs;
1656       }
1657       if (diag) {
1658         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1659       }
1660     }
1661   }
1662 
1663   ierr = PetscFree(rows);CHKERRQ(ierr);
1664   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1665   PetscFunctionReturn(0);
1666 }
1667 
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1670 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1671 {
1672   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1673   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1674   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1675   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1676   PetscErrorCode ierr;
1677   int         ridx,cidx,bs2=a->bs2;
1678   PetscTruth  roworiented=a->roworiented;
1679   MatScalar   *ap,value,*aa=a->a,*bap;
1680 
1681   PetscFunctionBegin;
1682   for (k=0; k<m; k++) { /* loop over added rows */
1683     row  = im[k]; brow = row/bs;
1684     if (row < 0) continue;
1685 #if defined(PETSC_USE_BOPT_g)
1686     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1687 #endif
1688     rp   = aj + ai[brow];
1689     ap   = aa + bs2*ai[brow];
1690     rmax = imax[brow];
1691     nrow = ailen[brow];
1692     low  = 0;
1693     for (l=0; l<n; l++) { /* loop over added columns */
1694       if (in[l] < 0) continue;
1695 #if defined(PETSC_USE_BOPT_g)
1696       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1697 #endif
1698       col = in[l]; bcol = col/bs;
1699       ridx = row % bs; cidx = col % bs;
1700       if (roworiented) {
1701         value = v[l + k*n];
1702       } else {
1703         value = v[k + l*m];
1704       }
1705       if (!sorted) low = 0; high = nrow;
1706       while (high-low > 7) {
1707         t = (low+high)/2;
1708         if (rp[t] > bcol) high = t;
1709         else              low  = t;
1710       }
1711       for (i=low; i<high; i++) {
1712         if (rp[i] > bcol) break;
1713         if (rp[i] == bcol) {
1714           bap  = ap +  bs2*i + bs*cidx + ridx;
1715           if (is == ADD_VALUES) *bap += value;
1716           else                  *bap  = value;
1717           goto noinsert1;
1718         }
1719       }
1720       if (nonew == 1) goto noinsert1;
1721       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1722       if (nrow >= rmax) {
1723         /* there is no extra room in row, therefore enlarge */
1724         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1725         MatScalar *new_a;
1726 
1727         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1728 
1729         /* Malloc new storage space */
1730         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1731 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1732         new_j   = (int*)(new_a + bs2*new_nz);
1733         new_i   = new_j + new_nz;
1734 
1735         /* copy over old data into new slots */
1736         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1737         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1738         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1739         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1740         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1741         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1742         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1743         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1744         /* free up old matrix storage */
1745         ierr = PetscFree(a->a);CHKERRQ(ierr);
1746         if (!a->singlemalloc) {
1747           ierr = PetscFree(a->i);CHKERRQ(ierr);
1748           ierr = PetscFree(a->j);CHKERRQ(ierr);
1749         }
1750         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1751         a->singlemalloc = PETSC_TRUE;
1752 
1753         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1754         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1755         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1756         a->maxnz += bs2*CHUNKSIZE;
1757         a->reallocs++;
1758         a->nz++;
1759       }
1760       N = nrow++ - 1;
1761       /* shift up all the later entries in this row */
1762       for (ii=N; ii>=i; ii--) {
1763         rp[ii+1] = rp[ii];
1764         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1765       }
1766       if (N>=i) {
1767         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1768       }
1769       rp[i]                      = bcol;
1770       ap[bs2*i + bs*cidx + ridx] = value;
1771       noinsert1:;
1772       low = i;
1773     }
1774     ailen[brow] = nrow;
1775   }
1776   PetscFunctionReturn(0);
1777 }
1778 
1779 
1780 #undef __FUNCT__
1781 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1782 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1783 {
1784   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1785   Mat         outA;
1786   PetscErrorCode ierr;
1787   PetscTruth  row_identity,col_identity;
1788 
1789   PetscFunctionBegin;
1790   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1791   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1792   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1793   if (!row_identity || !col_identity) {
1794     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1795   }
1796 
1797   outA          = inA;
1798   inA->factor   = FACTOR_LU;
1799 
1800   if (!a->diag) {
1801     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1802   }
1803 
1804   a->row        = row;
1805   a->col        = col;
1806   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1807   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1808 
1809   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1810   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1811   PetscLogObjectParent(inA,a->icol);
1812 
1813   /*
1814       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1815       for ILU(0) factorization with natural ordering
1816   */
1817   if (a->bs < 8) {
1818     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1819   } else {
1820     if (!a->solve_work) {
1821       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1822       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1823     }
1824   }
1825 
1826   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1827 
1828   PetscFunctionReturn(0);
1829 }
1830 #undef __FUNCT__
1831 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1832 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1833 {
1834   static PetscTruth called = PETSC_FALSE;
1835   MPI_Comm          comm = A->comm;
1836   PetscErrorCode ierr;
1837 
1838   PetscFunctionBegin;
1839   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1840   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1841   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 EXTERN_C_BEGIN
1846 #undef __FUNCT__
1847 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1848 PetscErrorCode MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1849 {
1850   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1851   int         i,nz,nbs;
1852 
1853   PetscFunctionBegin;
1854   nz  = baij->maxnz/baij->bs2;
1855   nbs = baij->nbs;
1856   for (i=0; i<nz; i++) {
1857     baij->j[i] = indices[i];
1858   }
1859   baij->nz = nz;
1860   for (i=0; i<nbs; i++) {
1861     baij->ilen[i] = baij->imax[i];
1862   }
1863 
1864   PetscFunctionReturn(0);
1865 }
1866 EXTERN_C_END
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1870 /*@
1871     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1872        in the matrix.
1873 
1874   Input Parameters:
1875 +  mat - the SeqBAIJ matrix
1876 -  indices - the column indices
1877 
1878   Level: advanced
1879 
1880   Notes:
1881     This can be called if you have precomputed the nonzero structure of the
1882   matrix and want to provide it to the matrix object to improve the performance
1883   of the MatSetValues() operation.
1884 
1885     You MUST have set the correct numbers of nonzeros per row in the call to
1886   MatCreateSeqBAIJ().
1887 
1888     MUST be called before any calls to MatSetValues();
1889 
1890 @*/
1891 PetscErrorCode MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1892 {
1893   PetscErrorCode ierr,(*f)(Mat,int *);
1894 
1895   PetscFunctionBegin;
1896   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1897   PetscValidPointer(indices,2);
1898   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1899   if (f) {
1900     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1901   } else {
1902     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1909 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1910 {
1911   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1912   PetscErrorCode ierr;
1913   int          i,j,n,row,bs,*ai,*aj,mbs;
1914   PetscReal    atmp;
1915   PetscScalar  *x,zero = 0.0;
1916   MatScalar    *aa;
1917   int          ncols,brow,krow,kcol;
1918 
1919   PetscFunctionBegin;
1920   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1921   bs   = a->bs;
1922   aa   = a->a;
1923   ai   = a->i;
1924   aj   = a->j;
1925   mbs = a->mbs;
1926 
1927   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1928   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1929   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1930   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1931   for (i=0; i<mbs; i++) {
1932     ncols = ai[1] - ai[0]; ai++;
1933     brow  = bs*i;
1934     for (j=0; j<ncols; j++){
1935       /* bcol = bs*(*aj); */
1936       for (kcol=0; kcol<bs; kcol++){
1937         for (krow=0; krow<bs; krow++){
1938           atmp = PetscAbsScalar(*aa); aa++;
1939           row = brow + krow;    /* row index */
1940           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1941           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1942         }
1943       }
1944       aj++;
1945     }
1946   }
1947   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1953 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1954 {
1955   PetscErrorCode ierr;
1956 
1957   PetscFunctionBegin;
1958   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1964 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1965 {
1966   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1967   PetscFunctionBegin;
1968   *array = a->a;
1969   PetscFunctionReturn(0);
1970 }
1971 
1972 #undef __FUNCT__
1973 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1974 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1975 {
1976   PetscFunctionBegin;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #include "petscblaslapack.h"
1981 #undef __FUNCT__
1982 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1983 PetscErrorCode MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1984 {
1985   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1986   PetscErrorCode ierr;
1987   int            i,bs=y->bs,j,bs2;
1988   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1989 
1990   PetscFunctionBegin;
1991   if (str == SAME_NONZERO_PATTERN) {
1992     BLaxpy_(&bnz,(PetscScalar*)a,x->a,&one,y->a,&one);
1993   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1994     if (y->xtoy && y->XtoY != X) {
1995       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1996       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1997     }
1998     if (!y->xtoy) { /* get xtoy */
1999       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2000       y->XtoY = X;
2001     }
2002     bs2 = bs*bs;
2003     for (i=0; i<x->nz; i++) {
2004       j = 0;
2005       while (j < bs2){
2006         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
2007         j++;
2008       }
2009     }
2010     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
2011   } else {
2012     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
2013   }
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 /* -------------------------------------------------------------------*/
2018 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2019        MatGetRow_SeqBAIJ,
2020        MatRestoreRow_SeqBAIJ,
2021        MatMult_SeqBAIJ_N,
2022 /* 4*/ MatMultAdd_SeqBAIJ_N,
2023        MatMultTranspose_SeqBAIJ,
2024        MatMultTransposeAdd_SeqBAIJ,
2025        MatSolve_SeqBAIJ_N,
2026        0,
2027        0,
2028 /*10*/ 0,
2029        MatLUFactor_SeqBAIJ,
2030        0,
2031        0,
2032        MatTranspose_SeqBAIJ,
2033 /*15*/ MatGetInfo_SeqBAIJ,
2034        MatEqual_SeqBAIJ,
2035        MatGetDiagonal_SeqBAIJ,
2036        MatDiagonalScale_SeqBAIJ,
2037        MatNorm_SeqBAIJ,
2038 /*20*/ 0,
2039        MatAssemblyEnd_SeqBAIJ,
2040        0,
2041        MatSetOption_SeqBAIJ,
2042        MatZeroEntries_SeqBAIJ,
2043 /*25*/ MatZeroRows_SeqBAIJ,
2044        MatLUFactorSymbolic_SeqBAIJ,
2045        MatLUFactorNumeric_SeqBAIJ_N,
2046        0,
2047        0,
2048 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2049        MatILUFactorSymbolic_SeqBAIJ,
2050        0,
2051        MatGetArray_SeqBAIJ,
2052        MatRestoreArray_SeqBAIJ,
2053 /*35*/ MatDuplicate_SeqBAIJ,
2054        0,
2055        0,
2056        MatILUFactor_SeqBAIJ,
2057        0,
2058 /*40*/ MatAXPY_SeqBAIJ,
2059        MatGetSubMatrices_SeqBAIJ,
2060        MatIncreaseOverlap_SeqBAIJ,
2061        MatGetValues_SeqBAIJ,
2062        0,
2063 /*45*/ MatPrintHelp_SeqBAIJ,
2064        MatScale_SeqBAIJ,
2065        0,
2066        0,
2067        0,
2068 /*50*/ MatGetBlockSize_SeqBAIJ,
2069        MatGetRowIJ_SeqBAIJ,
2070        MatRestoreRowIJ_SeqBAIJ,
2071        0,
2072        0,
2073 /*55*/ 0,
2074        0,
2075        0,
2076        0,
2077        MatSetValuesBlocked_SeqBAIJ,
2078 /*60*/ MatGetSubMatrix_SeqBAIJ,
2079        MatDestroy_SeqBAIJ,
2080        MatView_SeqBAIJ,
2081        MatGetPetscMaps_Petsc,
2082        0,
2083 /*65*/ 0,
2084        0,
2085        0,
2086        0,
2087        0,
2088 /*70*/ MatGetRowMax_SeqBAIJ,
2089        MatConvert_Basic,
2090        0,
2091        0,
2092        0,
2093 /*75*/ 0,
2094        0,
2095        0,
2096        0,
2097        0,
2098 /*80*/ 0,
2099        0,
2100        0,
2101        0,
2102 /*84*/ MatLoad_SeqBAIJ,
2103        0,
2104        0,
2105        0,
2106        0
2107 };
2108 
2109 EXTERN_C_BEGIN
2110 #undef __FUNCT__
2111 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2112 PetscErrorCode MatStoreValues_SeqBAIJ(Mat mat)
2113 {
2114   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2115   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2116   PetscErrorCode ierr;
2117 
2118   PetscFunctionBegin;
2119   if (aij->nonew != 1) {
2120     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2121   }
2122 
2123   /* allocate space for values if not already there */
2124   if (!aij->saved_values) {
2125     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2126   }
2127 
2128   /* copy values over */
2129   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 EXTERN_C_END
2133 
2134 EXTERN_C_BEGIN
2135 #undef __FUNCT__
2136 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2137 PetscErrorCode MatRetrieveValues_SeqBAIJ(Mat mat)
2138 {
2139   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2140   PetscErrorCode ierr;
2141   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2142 
2143   PetscFunctionBegin;
2144   if (aij->nonew != 1) {
2145     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2146   }
2147   if (!aij->saved_values) {
2148     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2149   }
2150 
2151   /* copy values over */
2152   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2153   PetscFunctionReturn(0);
2154 }
2155 EXTERN_C_END
2156 
2157 EXTERN_C_BEGIN
2158 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2159 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2160 EXTERN_C_END
2161 
2162 EXTERN_C_BEGIN
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2165 PetscErrorCode MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2166 {
2167   Mat_SeqBAIJ *b;
2168   PetscErrorCode ierr;
2169   int         i,len,mbs,nbs,bs2,newbs = bs;
2170   PetscTruth  flg;
2171 
2172   PetscFunctionBegin;
2173 
2174   B->preallocated = PETSC_TRUE;
2175   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2176   if (nnz && newbs != bs) {
2177     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2178   }
2179   bs   = newbs;
2180 
2181   mbs  = B->m/bs;
2182   nbs  = B->n/bs;
2183   bs2  = bs*bs;
2184 
2185   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2186     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2187   }
2188 
2189   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2190   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2191   if (nnz) {
2192     for (i=0; i<mbs; i++) {
2193       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2194       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2195     }
2196   }
2197 
2198   b       = (Mat_SeqBAIJ*)B->data;
2199   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2200   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2201   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2202   if (!flg) {
2203     switch (bs) {
2204     case 1:
2205       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2206       B->ops->mult            = MatMult_SeqBAIJ_1;
2207       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2208       break;
2209     case 2:
2210       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2211       B->ops->mult            = MatMult_SeqBAIJ_2;
2212       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2213       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2214       break;
2215     case 3:
2216       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2217       B->ops->mult            = MatMult_SeqBAIJ_3;
2218       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2219       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2220       break;
2221     case 4:
2222       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2223       B->ops->mult            = MatMult_SeqBAIJ_4;
2224       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2225       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2226       break;
2227     case 5:
2228       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2229       B->ops->mult            = MatMult_SeqBAIJ_5;
2230       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2231       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2232       break;
2233     case 6:
2234       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2235       B->ops->mult            = MatMult_SeqBAIJ_6;
2236       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2237       break;
2238     case 7:
2239       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2240       B->ops->mult            = MatMult_SeqBAIJ_7;
2241       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2242       break;
2243     default:
2244       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2245       B->ops->mult            = MatMult_SeqBAIJ_N;
2246       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2247       break;
2248     }
2249   }
2250   b->bs      = bs;
2251   b->mbs     = mbs;
2252   b->nbs     = nbs;
2253   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2254   if (!nnz) {
2255     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2256     else if (nz <= 0)        nz = 1;
2257     for (i=0; i<mbs; i++) b->imax[i] = nz;
2258     nz = nz*mbs;
2259   } else {
2260     nz = 0;
2261     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2262   }
2263 
2264   /* allocate the matrix space */
2265   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2266   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2267   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2268   b->j  = (int*)(b->a + nz*bs2);
2269   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2270   b->i  = b->j + nz;
2271   b->singlemalloc = PETSC_TRUE;
2272 
2273   b->i[0] = 0;
2274   for (i=1; i<mbs+1; i++) {
2275     b->i[i] = b->i[i-1] + b->imax[i-1];
2276   }
2277 
2278   /* b->ilen will count nonzeros in each block row so far. */
2279   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2280   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2281   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2282 
2283   b->bs               = bs;
2284   b->bs2              = bs2;
2285   b->mbs              = mbs;
2286   b->nz               = 0;
2287   b->maxnz            = nz*bs2;
2288   B->info.nz_unneeded = (PetscReal)b->maxnz;
2289   PetscFunctionReturn(0);
2290 }
2291 EXTERN_C_END
2292 
2293 /*MC
2294    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2295    block sparse compressed row format.
2296 
2297    Options Database Keys:
2298 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2299 
2300   Level: beginner
2301 
2302 .seealso: MatCreateSeqBAIJ
2303 M*/
2304 
2305 EXTERN_C_BEGIN
2306 #undef __FUNCT__
2307 #define __FUNCT__ "MatCreate_SeqBAIJ"
2308 PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2309 {
2310   PetscErrorCode ierr;
2311   int         size;
2312   Mat_SeqBAIJ *b;
2313 
2314   PetscFunctionBegin;
2315   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2316   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2317 
2318   B->m = B->M = PetscMax(B->m,B->M);
2319   B->n = B->N = PetscMax(B->n,B->N);
2320   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2321   B->data = (void*)b;
2322   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2323   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2324   B->factor           = 0;
2325   B->lupivotthreshold = 1.0;
2326   B->mapping          = 0;
2327   b->row              = 0;
2328   b->col              = 0;
2329   b->icol             = 0;
2330   b->reallocs         = 0;
2331   b->saved_values     = 0;
2332 #if defined(PETSC_USE_MAT_SINGLE)
2333   b->setvalueslen     = 0;
2334   b->setvaluescopy    = PETSC_NULL;
2335 #endif
2336 
2337   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2338   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2339 
2340   b->sorted           = PETSC_FALSE;
2341   b->roworiented      = PETSC_TRUE;
2342   b->nonew            = 0;
2343   b->diag             = 0;
2344   b->solve_work       = 0;
2345   b->mult_work        = 0;
2346   B->spptr            = 0;
2347   B->info.nz_unneeded = (PetscReal)b->maxnz;
2348   b->keepzeroedrows   = PETSC_FALSE;
2349   b->xtoy              = 0;
2350   b->XtoY              = 0;
2351 
2352   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2353                                      "MatStoreValues_SeqBAIJ",
2354                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2355   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2356                                      "MatRetrieveValues_SeqBAIJ",
2357                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2358   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2359                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2360                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2361   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2362                                      "MatConvert_SeqBAIJ_SeqAIJ",
2363                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2364 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2365                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2366                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2367   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2368                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2369                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2370   PetscFunctionReturn(0);
2371 }
2372 EXTERN_C_END
2373 
2374 #undef __FUNCT__
2375 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2376 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2377 {
2378   Mat         C;
2379   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2380   PetscErrorCode ierr;
2381   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2;
2382 
2383   PetscFunctionBegin;
2384   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2385 
2386   *B = 0;
2387   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2388   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2389   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2390   c    = (Mat_SeqBAIJ*)C->data;
2391 
2392   C->M   = A->M;
2393   C->N   = A->N;
2394   c->bs  = a->bs;
2395   c->bs2 = a->bs2;
2396   c->mbs = a->mbs;
2397   c->nbs = a->nbs;
2398 
2399   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
2400   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
2401   for (i=0; i<mbs; i++) {
2402     c->imax[i] = a->imax[i];
2403     c->ilen[i] = a->ilen[i];
2404   }
2405 
2406   /* allocate the matrix space */
2407   c->singlemalloc = PETSC_TRUE;
2408   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2409   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2410   c->j = (int*)(c->a + nz*bs2);
2411   c->i = c->j + nz;
2412   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
2413   if (mbs > 0) {
2414     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
2415     if (cpvalues == MAT_COPY_VALUES) {
2416       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2417     } else {
2418       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2419     }
2420   }
2421 
2422   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2423   c->sorted      = a->sorted;
2424   c->roworiented = a->roworiented;
2425   c->nonew       = a->nonew;
2426 
2427   if (a->diag) {
2428     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
2429     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2430     for (i=0; i<mbs; i++) {
2431       c->diag[i] = a->diag[i];
2432     }
2433   } else c->diag        = 0;
2434   c->nz                 = a->nz;
2435   c->maxnz              = a->maxnz;
2436   c->solve_work         = 0;
2437   c->mult_work          = 0;
2438   C->preallocated       = PETSC_TRUE;
2439   C->assembled          = PETSC_TRUE;
2440   *B = C;
2441   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatLoad_SeqBAIJ"
2447 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2448 {
2449   Mat_SeqBAIJ  *a;
2450   Mat          B;
2451   PetscErrorCode ierr;
2452   int          i,nz,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2453   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2454   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2455   int          *masked,nmask,tmp,bs2,ishift;
2456   PetscScalar  *aa;
2457   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2458 
2459   PetscFunctionBegin;
2460   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2461   bs2  = bs*bs;
2462 
2463   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2464   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2465   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2466   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2467   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2468   M = header[1]; N = header[2]; nz = header[3];
2469 
2470   if (header[3] < 0) {
2471     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2472   }
2473 
2474   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2475 
2476   /*
2477      This code adds extra rows to make sure the number of rows is
2478     divisible by the blocksize
2479   */
2480   mbs        = M/bs;
2481   extra_rows = bs - M + bs*(mbs);
2482   if (extra_rows == bs) extra_rows = 0;
2483   else                  mbs++;
2484   if (extra_rows) {
2485     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2486   }
2487 
2488   /* read in row lengths */
2489   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2490   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2491   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2492 
2493   /* read in column indices */
2494   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
2495   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2496   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2497 
2498   /* loop over row lengths determining block row lengths */
2499   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
2500   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
2501   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
2502   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
2503   masked   = mask + mbs;
2504   rowcount = 0; nzcount = 0;
2505   for (i=0; i<mbs; i++) {
2506     nmask = 0;
2507     for (j=0; j<bs; j++) {
2508       kmax = rowlengths[rowcount];
2509       for (k=0; k<kmax; k++) {
2510         tmp = jj[nzcount++]/bs;
2511         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2512       }
2513       rowcount++;
2514     }
2515     browlengths[i] += nmask;
2516     /* zero out the mask elements we set */
2517     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2518   }
2519 
2520   /* create our matrix */
2521   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2522   ierr = MatSetType(B,type);CHKERRQ(ierr);
2523   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2524   a = (Mat_SeqBAIJ*)B->data;
2525 
2526   /* set matrix "i" values */
2527   a->i[0] = 0;
2528   for (i=1; i<= mbs; i++) {
2529     a->i[i]      = a->i[i-1] + browlengths[i-1];
2530     a->ilen[i-1] = browlengths[i-1];
2531   }
2532   a->nz         = 0;
2533   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2534 
2535   /* read in nonzero values */
2536   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2537   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2538   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2539 
2540   /* set "a" and "j" values into matrix */
2541   nzcount = 0; jcount = 0;
2542   for (i=0; i<mbs; i++) {
2543     nzcountb = nzcount;
2544     nmask    = 0;
2545     for (j=0; j<bs; j++) {
2546       kmax = rowlengths[i*bs+j];
2547       for (k=0; k<kmax; k++) {
2548         tmp = jj[nzcount++]/bs;
2549 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2550       }
2551     }
2552     /* sort the masked values */
2553     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2554 
2555     /* set "j" values into matrix */
2556     maskcount = 1;
2557     for (j=0; j<nmask; j++) {
2558       a->j[jcount++]  = masked[j];
2559       mask[masked[j]] = maskcount++;
2560     }
2561     /* set "a" values into matrix */
2562     ishift = bs2*a->i[i];
2563     for (j=0; j<bs; j++) {
2564       kmax = rowlengths[i*bs+j];
2565       for (k=0; k<kmax; k++) {
2566         tmp       = jj[nzcountb]/bs ;
2567         block     = mask[tmp] - 1;
2568         point     = jj[nzcountb] - bs*tmp;
2569         idx       = ishift + bs2*block + j + bs*point;
2570         a->a[idx] = (MatScalar)aa[nzcountb++];
2571       }
2572     }
2573     /* zero out the mask elements we set */
2574     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2575   }
2576   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2577 
2578   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2579   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2580   ierr = PetscFree(aa);CHKERRQ(ierr);
2581   ierr = PetscFree(jj);CHKERRQ(ierr);
2582   ierr = PetscFree(mask);CHKERRQ(ierr);
2583 
2584   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2585   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2586   ierr = MatView_Private(B);CHKERRQ(ierr);
2587 
2588   *A = B;
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 #undef __FUNCT__
2593 #define __FUNCT__ "MatCreateSeqBAIJ"
2594 /*@C
2595    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2596    compressed row) format.  For good matrix assembly performance the
2597    user should preallocate the matrix storage by setting the parameter nz
2598    (or the array nnz).  By setting these parameters accurately, performance
2599    during matrix assembly can be increased by more than a factor of 50.
2600 
2601    Collective on MPI_Comm
2602 
2603    Input Parameters:
2604 +  comm - MPI communicator, set to PETSC_COMM_SELF
2605 .  bs - size of block
2606 .  m - number of rows
2607 .  n - number of columns
2608 .  nz - number of nonzero blocks  per block row (same for all rows)
2609 -  nnz - array containing the number of nonzero blocks in the various block rows
2610          (possibly different for each block row) or PETSC_NULL
2611 
2612    Output Parameter:
2613 .  A - the matrix
2614 
2615    Options Database Keys:
2616 .   -mat_no_unroll - uses code that does not unroll the loops in the
2617                      block calculations (much slower)
2618 .    -mat_block_size - size of the blocks to use
2619 
2620    Level: intermediate
2621 
2622    Notes:
2623    A nonzero block is any block that as 1 or more nonzeros in it
2624 
2625    The block AIJ format is fully compatible with standard Fortran 77
2626    storage.  That is, the stored row and column indices can begin at
2627    either one (as in Fortran) or zero.  See the users' manual for details.
2628 
2629    Specify the preallocated storage with either nz or nnz (not both).
2630    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2631    allocation.  For additional details, see the users manual chapter on
2632    matrices.
2633 
2634 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2635 @*/
2636 PetscErrorCode MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2637 {
2638   PetscErrorCode ierr;
2639 
2640   PetscFunctionBegin;
2641   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2642   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2643   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2644   PetscFunctionReturn(0);
2645 }
2646 
2647 #undef __FUNCT__
2648 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2649 /*@C
2650    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2651    per row in the matrix. For good matrix assembly performance the
2652    user should preallocate the matrix storage by setting the parameter nz
2653    (or the array nnz).  By setting these parameters accurately, performance
2654    during matrix assembly can be increased by more than a factor of 50.
2655 
2656    Collective on MPI_Comm
2657 
2658    Input Parameters:
2659 +  A - the matrix
2660 .  bs - size of block
2661 .  nz - number of block nonzeros per block row (same for all rows)
2662 -  nnz - array containing the number of block nonzeros in the various block rows
2663          (possibly different for each block row) or PETSC_NULL
2664 
2665    Options Database Keys:
2666 .   -mat_no_unroll - uses code that does not unroll the loops in the
2667                      block calculations (much slower)
2668 .    -mat_block_size - size of the blocks to use
2669 
2670    Level: intermediate
2671 
2672    Notes:
2673    The block AIJ format is fully compatible with standard Fortran 77
2674    storage.  That is, the stored row and column indices can begin at
2675    either one (as in Fortran) or zero.  See the users' manual for details.
2676 
2677    Specify the preallocated storage with either nz or nnz (not both).
2678    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2679    allocation.  For additional details, see the users manual chapter on
2680    matrices.
2681 
2682 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2683 @*/
2684 PetscErrorCode MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2685 {
2686   PetscErrorCode ierr,(*f)(Mat,int,int,const int[]);
2687 
2688   PetscFunctionBegin;
2689   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2690   if (f) {
2691     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2692   }
2693   PetscFunctionReturn(0);
2694 }
2695 
2696