xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: baij.c,v 1.245 2001/08/07 03:02:55 balay Exp bsmith $*/
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
15 int MatInvertBlockDiagonal_SeqBAIJ(Mat A)
16 {
17   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*) A->data;
18   int         *diag_offset,i,bs = a->bs,mbs = a->mbs,ierr;
19   PetscScalar *v = a->a,*odiag,*diag,*mdiag;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) PetscFunctionReturn(0);
23   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
24   diag_offset = a->diag;
25   if (!a->idiag) {
26     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
27   }
28   diag  = a->idiag;
29   mdiag = a->idiag+bs*bs*mbs;
30   /* factor and invert each block */
31   switch (a->bs){
32     case 2:
33       for (i=0; i<mbs; i++) {
34         odiag   = v + 4*diag_offset[i];
35         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
36 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
37 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
38 	diag    += 4;
39 	mdiag   += 4;
40       }
41       break;
42     case 3:
43       for (i=0; i<mbs; i++) {
44         odiag    = v + 9*diag_offset[i];
45         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
46         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
47         diag[8]  = odiag[8];
48         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
49         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
50         mdiag[8] = odiag[8];
51 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
52 	diag    += 9;
53 	mdiag   += 9;
54       }
55       break;
56     case 4:
57       for (i=0; i<mbs; i++) {
58         odiag  = v + 16*diag_offset[i];
59         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
60         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
61 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
62 	diag  += 16;
63 	mdiag += 16;
64       }
65       break;
66     case 5:
67       for (i=0; i<mbs; i++) {
68         odiag = v + 25*diag_offset[i];
69         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
70         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
71 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
72 	diag  += 25;
73 	mdiag += 25;
74       }
75       break;
76     default:
77       SETERRQ1(1,"not supported for block size %d",a->bs);
78   }
79   a->idiagvalid = PETSC_TRUE;
80   PetscFunctionReturn(0);
81 }
82 
83 #undef __FUNCT__
84 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
85 int MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
86 {
87   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
88   PetscScalar        *x,x1,x2,s1,s2;
89   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
90   int                ierr,m = a->mbs,i,i2,nz,idx;
91   const int          *diag,*ai = a->i,*aj = a->j,*vi;
92 
93   PetscFunctionBegin;
94   its = its*lits;
95   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
96   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
97   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
98   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
99   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
100 
101   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
102 
103   diag  = a->diag;
104   idiag = a->idiag;
105   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
106   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
107 
108   if (flag & SOR_ZERO_INITIAL_GUESS) {
109     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
110       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
111       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
112       i2     = 2;
113       idiag += 4;
114       for (i=1; i<m; i++) {
115 	v     = aa + 4*ai[i];
116 	vi    = aj + ai[i];
117 	nz    = diag[i] - ai[i];
118 	s1    = b[i2]; s2 = b[i2+1];
119 	while (nz--) {
120 	  idx  = 2*(*vi++);
121 	  x1   = x[idx]; x2 = x[1+idx];
122 	  s1  -= v[0]*x1 + v[2]*x2;
123 	  s2  -= v[1]*x1 + v[3]*x2;
124 	  v   += 4;
125 	}
126 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
127 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
128         idiag   += 4;
129         i2      += 2;
130       }
131       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
132       PetscLogFlops(4*(a->nz));
133     }
134     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
135         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
136       i2    = 0;
137       mdiag = a->idiag+4*a->mbs;
138       for (i=0; i<m; i++) {
139         x1      = x[i2]; x2 = x[i2+1];
140         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
141         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
142         mdiag  += 4;
143         i2     += 2;
144       }
145       PetscLogFlops(6*m);
146     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
147       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
148     }
149     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
150       idiag   = a->idiag+4*a->mbs - 4;
151       i2      = 2*m - 2;
152       x1      = x[i2]; x2 = x[i2+1];
153       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
154       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
155       idiag -= 4;
156       i2    -= 2;
157       for (i=m-2; i>=0; i--) {
158 	v     = aa + 4*(diag[i]+1);
159 	vi    = aj + diag[i] + 1;
160 	nz    = ai[i+1] - diag[i] - 1;
161 	s1    = x[i2]; s2 = x[i2+1];
162 	while (nz--) {
163 	  idx  = 2*(*vi++);
164 	  x1   = x[idx]; x2 = x[1+idx];
165 	  s1  -= v[0]*x1 + v[2]*x2;
166 	  s2  -= v[1]*x1 + v[3]*x2;
167 	  v   += 4;
168 	}
169 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
170 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
171         idiag   -= 4;
172         i2      -= 2;
173       }
174       PetscLogFlops(4*(a->nz));
175     }
176   } else {
177     SETERRQ(1,"Only supports point block SOR with zero initial guess");
178   }
179   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
180   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
181   PetscFunctionReturn(0);
182 }
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
186 int MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
189   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
190   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
191   int                ierr,m = a->mbs,i,i2,nz,idx;
192   const int          *diag,*ai = a->i,*aj = a->j,*vi;
193 
194   PetscFunctionBegin;
195   its = its*lits;
196   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
197   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
198   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
199   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
200   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
201 
202   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
203 
204   diag  = a->diag;
205   idiag = a->idiag;
206   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
207   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
208 
209   if (flag & SOR_ZERO_INITIAL_GUESS) {
210     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
211       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
212       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
213       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
214       i2     = 3;
215       idiag += 9;
216       for (i=1; i<m; i++) {
217 	v     = aa + 9*ai[i];
218 	vi    = aj + ai[i];
219 	nz    = diag[i] - ai[i];
220 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
221 	while (nz--) {
222 	  idx  = 3*(*vi++);
223 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
224 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
225 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
226 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
227 	  v   += 9;
228 	}
229 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
230 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
231 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
232         idiag   += 9;
233         i2      += 3;
234       }
235       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
236       PetscLogFlops(9*(a->nz));
237     }
238     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
239         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
240       i2    = 0;
241       mdiag = a->idiag+9*a->mbs;
242       for (i=0; i<m; i++) {
243         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
244         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
245         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
246         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
247         mdiag  += 9;
248         i2     += 3;
249       }
250       PetscLogFlops(15*m);
251     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
252       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
253     }
254     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
255       idiag   = a->idiag+9*a->mbs - 9;
256       i2      = 3*m - 3;
257       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
258       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
259       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
260       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
261       idiag -= 9;
262       i2    -= 3;
263       for (i=m-2; i>=0; i--) {
264 	v     = aa + 9*(diag[i]+1);
265 	vi    = aj + diag[i] + 1;
266 	nz    = ai[i+1] - diag[i] - 1;
267 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
268 	while (nz--) {
269 	  idx  = 3*(*vi++);
270 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
271 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
272 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
273 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
274 	  v   += 9;
275 	}
276 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
277 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
278 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
279         idiag   -= 9;
280         i2      -= 3;
281       }
282       PetscLogFlops(9*(a->nz));
283     }
284   } else {
285     SETERRQ(1,"Only supports point block SOR with zero initial guess");
286   }
287   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
288   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
294 int MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
295 {
296   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
297   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
298   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
299   int                ierr,m = a->mbs,i,i2,nz,idx;
300   const int          *diag,*ai = a->i,*aj = a->j,*vi;
301 
302   PetscFunctionBegin;
303   its = its*lits;
304   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
305   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
306   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
307   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
308   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
309 
310   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
311 
312   diag  = a->diag;
313   idiag = a->idiag;
314   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
315   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
316 
317   if (flag & SOR_ZERO_INITIAL_GUESS) {
318     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
319       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
320       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
321       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
322       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
323       i2     = 4;
324       idiag += 16;
325       for (i=1; i<m; i++) {
326 	v     = aa + 16*ai[i];
327 	vi    = aj + ai[i];
328 	nz    = diag[i] - ai[i];
329 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
330 	while (nz--) {
331 	  idx  = 4*(*vi++);
332 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
333 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
334 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
335 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
336 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
337 	  v   += 16;
338 	}
339 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
340 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
341 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
342 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
343         idiag   += 16;
344         i2      += 4;
345       }
346       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
347       PetscLogFlops(16*(a->nz));
348     }
349     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
350         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
351       i2    = 0;
352       mdiag = a->idiag+16*a->mbs;
353       for (i=0; i<m; i++) {
354         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
355         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
356         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
357         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
358         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
359         mdiag  += 16;
360         i2     += 4;
361       }
362       PetscLogFlops(28*m);
363     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
364       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
365     }
366     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
367       idiag   = a->idiag+16*a->mbs - 16;
368       i2      = 4*m - 4;
369       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
370       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
371       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
372       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
373       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
374       idiag -= 16;
375       i2    -= 4;
376       for (i=m-2; i>=0; i--) {
377 	v     = aa + 16*(diag[i]+1);
378 	vi    = aj + diag[i] + 1;
379 	nz    = ai[i+1] - diag[i] - 1;
380 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
381 	while (nz--) {
382 	  idx  = 4*(*vi++);
383 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
384 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
385 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
386 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
387 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
388 	  v   += 16;
389 	}
390 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
391 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
392 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
393 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
394         idiag   -= 16;
395         i2      -= 4;
396       }
397       PetscLogFlops(16*(a->nz));
398     }
399   } else {
400     SETERRQ(1,"Only supports point block SOR with zero initial guess");
401   }
402   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
403   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
409 int MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
410 {
411   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
412   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
413   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
414   int                ierr,m = a->mbs,i,i2,nz,idx;
415   const int          *diag,*ai = a->i,*aj = a->j,*vi;
416 
417   PetscFunctionBegin;
418   its = its*lits;
419   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
420   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
421   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
422   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
423   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
424 
425   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
426 
427   diag  = a->diag;
428   idiag = a->idiag;
429   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
430   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
431 
432   if (flag & SOR_ZERO_INITIAL_GUESS) {
433     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
434       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
435       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
436       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
437       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
438       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
439       i2     = 5;
440       idiag += 25;
441       for (i=1; i<m; i++) {
442 	v     = aa + 25*ai[i];
443 	vi    = aj + ai[i];
444 	nz    = diag[i] - ai[i];
445 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
446 	while (nz--) {
447 	  idx  = 5*(*vi++);
448 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
449 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
450 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
451 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
452 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
453 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
454 	  v   += 25;
455 	}
456 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
457 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
458 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
459 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
460 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
461         idiag   += 25;
462         i2      += 5;
463       }
464       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
465       PetscLogFlops(25*(a->nz));
466     }
467     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
468         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
469       i2    = 0;
470       mdiag = a->idiag+25*a->mbs;
471       for (i=0; i<m; i++) {
472         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
473         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
474         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
475         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
476         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
477         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
478         mdiag  += 25;
479         i2     += 5;
480       }
481       PetscLogFlops(45*m);
482     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
483       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
484     }
485     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
486       idiag   = a->idiag+25*a->mbs - 25;
487       i2      = 5*m - 5;
488       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
489       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
490       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
491       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
492       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
493       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
494       idiag -= 25;
495       i2    -= 5;
496       for (i=m-2; i>=0; i--) {
497 	v     = aa + 25*(diag[i]+1);
498 	vi    = aj + diag[i] + 1;
499 	nz    = ai[i+1] - diag[i] - 1;
500 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
501 	while (nz--) {
502 	  idx  = 5*(*vi++);
503 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
504 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
505 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
506 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
507 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
508 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
509 	  v   += 25;
510 	}
511 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
512 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
513 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
514 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
515 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
516         idiag   -= 25;
517         i2      -= 5;
518       }
519       PetscLogFlops(25*(a->nz));
520     }
521   } else {
522     SETERRQ(1,"Only supports point block SOR with zero initial guess");
523   }
524   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
525   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
526   PetscFunctionReturn(0);
527 }
528 
529 /*
530     Special version for Fun3d sequential benchmark
531 */
532 #if defined(PETSC_HAVE_FORTRAN_CAPS)
533 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
534 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
535 #define matsetvaluesblocked4_ matsetvaluesblocked4
536 #endif
537 
538 EXTERN_C_BEGIN
539 #undef __FUNCT__
540 #define __FUNCT__ "matsetvaluesblocked4_"
541 void matsetvaluesblocked4_(Mat *AA,int *mm,const int im[],int *nn,const int in[],const PetscScalar v[])
542 {
543   Mat               A = *AA;
544   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
545   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
546   int               *ai=a->i,*ailen=a->ilen;
547   int               *aj=a->j,stepval;
548   const PetscScalar *value = v;
549   MatScalar         *ap,*aa = a->a,*bap;
550 
551   PetscFunctionBegin;
552   stepval = (n-1)*4;
553   for (k=0; k<m; k++) { /* loop over added rows */
554     row  = im[k];
555     rp   = aj + ai[row];
556     ap   = aa + 16*ai[row];
557     nrow = ailen[row];
558     low  = 0;
559     for (l=0; l<n; l++) { /* loop over added columns */
560       col = in[l];
561       value = v + k*(stepval+4)*4 + l*4;
562       low = 0; high = nrow;
563       while (high-low > 7) {
564         t = (low+high)/2;
565         if (rp[t] > col) high = t;
566         else             low  = t;
567       }
568       for (i=low; i<high; i++) {
569         if (rp[i] > col) break;
570         if (rp[i] == col) {
571           bap  = ap +  16*i;
572           for (ii=0; ii<4; ii++,value+=stepval) {
573             for (jj=ii; jj<16; jj+=4) {
574               bap[jj] += *value++;
575             }
576           }
577           goto noinsert2;
578         }
579       }
580       N = nrow++ - 1;
581       /* shift up all the later entries in this row */
582       for (ii=N; ii>=i; ii--) {
583         rp[ii+1] = rp[ii];
584         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
585       }
586       if (N >= i) {
587         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
588       }
589       rp[i] = col;
590       bap   = ap +  16*i;
591       for (ii=0; ii<4; ii++,value+=stepval) {
592         for (jj=ii; jj<16; jj+=4) {
593           bap[jj] = *value++;
594         }
595       }
596       noinsert2:;
597       low = i;
598     }
599     ailen[row] = nrow;
600   }
601 }
602 EXTERN_C_END
603 
604 #if defined(PETSC_HAVE_FORTRAN_CAPS)
605 #define matsetvalues4_ MATSETVALUES4
606 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
607 #define matsetvalues4_ matsetvalues4
608 #endif
609 
610 EXTERN_C_BEGIN
611 #undef __FUNCT__
612 #define __FUNCT__ "MatSetValues4_"
613 void matsetvalues4_(Mat *AA,int *mm,int *im,int *nn,int *in,PetscScalar *v)
614 {
615   Mat         A = *AA;
616   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
617   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
618   int         *ai=a->i,*ailen=a->ilen;
619   int         *aj=a->j,brow,bcol;
620   int         ridx,cidx;
621   MatScalar   *ap,value,*aa=a->a,*bap;
622 
623   PetscFunctionBegin;
624   for (k=0; k<m; k++) { /* loop over added rows */
625     row  = im[k]; brow = row/4;
626     rp   = aj + ai[brow];
627     ap   = aa + 16*ai[brow];
628     nrow = ailen[brow];
629     low  = 0;
630     for (l=0; l<n; l++) { /* loop over added columns */
631       col = in[l]; bcol = col/4;
632       ridx = row % 4; cidx = col % 4;
633       value = v[l + k*n];
634       low = 0; high = nrow;
635       while (high-low > 7) {
636         t = (low+high)/2;
637         if (rp[t] > bcol) high = t;
638         else              low  = t;
639       }
640       for (i=low; i<high; i++) {
641         if (rp[i] > bcol) break;
642         if (rp[i] == bcol) {
643           bap  = ap +  16*i + 4*cidx + ridx;
644           *bap += value;
645           goto noinsert1;
646         }
647       }
648       N = nrow++ - 1;
649       /* shift up all the later entries in this row */
650       for (ii=N; ii>=i; ii--) {
651         rp[ii+1] = rp[ii];
652         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
653       }
654       if (N>=i) {
655         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
656       }
657       rp[i]                    = bcol;
658       ap[16*i + 4*cidx + ridx] = value;
659       noinsert1:;
660       low = i;
661     }
662     ailen[brow] = nrow;
663   }
664 }
665 EXTERN_C_END
666 
667 /*  UGLY, ugly, ugly
668    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
669    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
670    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
671    converts the entries into single precision and then calls ..._MatScalar() to put them
672    into the single precision data structures.
673 */
674 #if defined(PETSC_USE_MAT_SINGLE)
675 EXTERN int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
676 #else
677 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
678 #endif
679 
680 #define CHUNKSIZE  10
681 
682 /*
683      Checks for missing diagonals
684 */
685 #undef __FUNCT__
686 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
687 int MatMissingDiagonal_SeqBAIJ(Mat A)
688 {
689   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
690   int         *diag,*jj = a->j,i,ierr;
691 
692   PetscFunctionBegin;
693   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
694   diag = a->diag;
695   for (i=0; i<a->mbs; i++) {
696     if (jj[diag[i]] != i) {
697       SETERRQ1(1,"Matrix is missing diagonal number %d",i);
698     }
699   }
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
705 int MatMarkDiagonal_SeqBAIJ(Mat A)
706 {
707   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
708   int         i,j,*diag,m = a->mbs,ierr;
709 
710   PetscFunctionBegin;
711   if (a->diag) PetscFunctionReturn(0);
712 
713   ierr = PetscMalloc((m+1)*sizeof(int),&diag);CHKERRQ(ierr);
714   PetscLogObjectMemory(A,(m+1)*sizeof(int));
715   for (i=0; i<m; i++) {
716     diag[i] = a->i[i+1];
717     for (j=a->i[i]; j<a->i[i+1]; j++) {
718       if (a->j[j] == i) {
719         diag[i] = j;
720         break;
721       }
722     }
723   }
724   a->diag = diag;
725   PetscFunctionReturn(0);
726 }
727 
728 
729 EXTERN int MatToSymmetricIJ_SeqAIJ(int,int*,int*,int,int,int**,int**);
730 
731 #undef __FUNCT__
732 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
733 static int MatGetRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
734 {
735   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
736   int         ierr,n = a->mbs,i;
737 
738   PetscFunctionBegin;
739   *nn = n;
740   if (!ia) PetscFunctionReturn(0);
741   if (symmetric) {
742     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
743   } else if (oshift == 1) {
744     /* temporarily add 1 to i and j indices */
745     int nz = a->i[n];
746     for (i=0; i<nz; i++) a->j[i]++;
747     for (i=0; i<n+1; i++) a->i[i]++;
748     *ia = a->i; *ja = a->j;
749   } else {
750     *ia = a->i; *ja = a->j;
751   }
752 
753   PetscFunctionReturn(0);
754 }
755 
756 #undef __FUNCT__
757 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
758 static int MatRestoreRowIJ_SeqBAIJ(Mat A,int oshift,PetscTruth symmetric,int *nn,int *ia[],int *ja[],PetscTruth *done)
759 {
760   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
761   int         i,n = a->mbs,ierr;
762 
763   PetscFunctionBegin;
764   if (!ia) PetscFunctionReturn(0);
765   if (symmetric) {
766     ierr = PetscFree(*ia);CHKERRQ(ierr);
767     ierr = PetscFree(*ja);CHKERRQ(ierr);
768   } else if (oshift == 1) {
769     int nz = a->i[n]-1;
770     for (i=0; i<nz; i++) a->j[i]--;
771     for (i=0; i<n+1; i++) a->i[i]--;
772   }
773   PetscFunctionReturn(0);
774 }
775 
776 #undef __FUNCT__
777 #define __FUNCT__ "MatGetBlockSize_SeqBAIJ"
778 int MatGetBlockSize_SeqBAIJ(Mat mat,int *bs)
779 {
780   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
781 
782   PetscFunctionBegin;
783   *bs = baij->bs;
784   PetscFunctionReturn(0);
785 }
786 
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatDestroy_SeqBAIJ"
790 int MatDestroy_SeqBAIJ(Mat A)
791 {
792   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
793   int         ierr;
794 
795   PetscFunctionBegin;
796 #if defined(PETSC_USE_LOG)
797   PetscLogObjectState((PetscObject)A,"Rows=%d, Cols=%d, NZ=%d",A->m,A->n,a->nz);
798 #endif
799   ierr = PetscFree(a->a);CHKERRQ(ierr);
800   if (!a->singlemalloc) {
801     ierr = PetscFree(a->i);CHKERRQ(ierr);
802     ierr = PetscFree(a->j);CHKERRQ(ierr);
803   }
804   if (a->row) {
805     ierr = ISDestroy(a->row);CHKERRQ(ierr);
806   }
807   if (a->col) {
808     ierr = ISDestroy(a->col);CHKERRQ(ierr);
809   }
810   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
811   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
812   if (a->ilen) {ierr = PetscFree(a->ilen);CHKERRQ(ierr);}
813   if (a->imax) {ierr = PetscFree(a->imax);CHKERRQ(ierr);}
814   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
815   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
816   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
817   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
818 #if defined(PETSC_USE_MAT_SINGLE)
819   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
820 #endif
821   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
822 
823   ierr = PetscFree(a);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatSetOption_SeqBAIJ"
829 int MatSetOption_SeqBAIJ(Mat A,MatOption op)
830 {
831   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
832 
833   PetscFunctionBegin;
834   switch (op) {
835   case MAT_ROW_ORIENTED:
836     a->roworiented    = PETSC_TRUE;
837     break;
838   case MAT_COLUMN_ORIENTED:
839     a->roworiented    = PETSC_FALSE;
840     break;
841   case MAT_COLUMNS_SORTED:
842     a->sorted         = PETSC_TRUE;
843     break;
844   case MAT_COLUMNS_UNSORTED:
845     a->sorted         = PETSC_FALSE;
846     break;
847   case MAT_KEEP_ZEROED_ROWS:
848     a->keepzeroedrows = PETSC_TRUE;
849     break;
850   case MAT_NO_NEW_NONZERO_LOCATIONS:
851     a->nonew          = 1;
852     break;
853   case MAT_NEW_NONZERO_LOCATION_ERR:
854     a->nonew          = -1;
855     break;
856   case MAT_NEW_NONZERO_ALLOCATION_ERR:
857     a->nonew          = -2;
858     break;
859   case MAT_YES_NEW_NONZERO_LOCATIONS:
860     a->nonew          = 0;
861     break;
862   case MAT_ROWS_SORTED:
863   case MAT_ROWS_UNSORTED:
864   case MAT_YES_NEW_DIAGONALS:
865   case MAT_IGNORE_OFF_PROC_ENTRIES:
866   case MAT_USE_HASH_TABLE:
867     PetscLogInfo(A,"MatSetOption_SeqBAIJ:Option ignored\n");
868     break;
869   case MAT_NO_NEW_DIAGONALS:
870     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
871   case MAT_SYMMETRIC:
872   case MAT_STRUCTURALLY_SYMMETRIC:
873   case MAT_NOT_SYMMETRIC:
874   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
875   case MAT_HERMITIAN:
876   case MAT_NOT_HERMITIAN:
877   case MAT_SYMMETRY_ETERNAL:
878   case MAT_NOT_SYMMETRY_ETERNAL:
879     break;
880   default:
881     SETERRQ(PETSC_ERR_SUP,"unknown option");
882   }
883   PetscFunctionReturn(0);
884 }
885 
886 #undef __FUNCT__
887 #define __FUNCT__ "MatGetRow_SeqBAIJ"
888 int MatGetRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
889 {
890   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
891   int          itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2,ierr;
892   MatScalar    *aa,*aa_i;
893   PetscScalar  *v_i;
894 
895   PetscFunctionBegin;
896   bs  = a->bs;
897   ai  = a->i;
898   aj  = a->j;
899   aa  = a->a;
900   bs2 = a->bs2;
901 
902   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d out of range", row);
903 
904   bn  = row/bs;   /* Block number */
905   bp  = row % bs; /* Block Position */
906   M   = ai[bn+1] - ai[bn];
907   *nz = bs*M;
908 
909   if (v) {
910     *v = 0;
911     if (*nz) {
912       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
913       for (i=0; i<M; i++) { /* for each block in the block row */
914         v_i  = *v + i*bs;
915         aa_i = aa + bs2*(ai[bn] + i);
916         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
917       }
918     }
919   }
920 
921   if (idx) {
922     *idx = 0;
923     if (*nz) {
924       ierr = PetscMalloc((*nz)*sizeof(int),idx);CHKERRQ(ierr);
925       for (i=0; i<M; i++) { /* for each block in the block row */
926         idx_i = *idx + i*bs;
927         itmp  = bs*aj[ai[bn] + i];
928         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
929       }
930     }
931   }
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
937 int MatRestoreRow_SeqBAIJ(Mat A,int row,int *nz,int **idx,PetscScalar **v)
938 {
939   int ierr;
940 
941   PetscFunctionBegin;
942   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
943   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatTranspose_SeqBAIJ"
949 int MatTranspose_SeqBAIJ(Mat A,Mat *B)
950 {
951   Mat_SeqBAIJ *a=(Mat_SeqBAIJ *)A->data;
952   Mat         C;
953   int         i,j,k,ierr,*aj=a->j,*ai=a->i,bs=a->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
954   int         *rows,*cols,bs2=a->bs2;
955   PetscScalar *array;
956 
957   PetscFunctionBegin;
958   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
959   ierr = PetscMalloc((1+nbs)*sizeof(int),&col);CHKERRQ(ierr);
960   ierr = PetscMemzero(col,(1+nbs)*sizeof(int));CHKERRQ(ierr);
961 
962 #if defined(PETSC_USE_MAT_SINGLE)
963   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
964   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
965 #else
966   array = a->a;
967 #endif
968 
969   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
970   ierr = MatCreateSeqBAIJ(A->comm,bs,A->n,A->m,PETSC_NULL,col,&C);CHKERRQ(ierr);
971   ierr = PetscFree(col);CHKERRQ(ierr);
972   ierr = PetscMalloc(2*bs*sizeof(int),&rows);CHKERRQ(ierr);
973   cols = rows + bs;
974   for (i=0; i<mbs; i++) {
975     cols[0] = i*bs;
976     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
977     len = ai[i+1] - ai[i];
978     for (j=0; j<len; j++) {
979       rows[0] = (*aj++)*bs;
980       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
981       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
982       array += bs2;
983     }
984   }
985   ierr = PetscFree(rows);CHKERRQ(ierr);
986 #if defined(PETSC_USE_MAT_SINGLE)
987   ierr = PetscFree(array);
988 #endif
989 
990   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
991   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
992 
993   if (B) {
994     *B = C;
995   } else {
996     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
997   }
998   PetscFunctionReturn(0);
999 }
1000 
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1003 static int MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1004 {
1005   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1006   int         i,fd,*col_lens,ierr,bs = a->bs,count,*jj,j,k,l,bs2=a->bs2;
1007   PetscScalar *aa;
1008   FILE        *file;
1009 
1010   PetscFunctionBegin;
1011   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1012   ierr        = PetscMalloc((4+A->m)*sizeof(int),&col_lens);CHKERRQ(ierr);
1013   col_lens[0] = MAT_FILE_COOKIE;
1014 
1015   col_lens[1] = A->m;
1016   col_lens[2] = A->n;
1017   col_lens[3] = a->nz*bs2;
1018 
1019   /* store lengths of each row and write (including header) to file */
1020   count = 0;
1021   for (i=0; i<a->mbs; i++) {
1022     for (j=0; j<bs; j++) {
1023       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1024     }
1025   }
1026   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,1);CHKERRQ(ierr);
1027   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1028 
1029   /* store column indices (zero start index) */
1030   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(int),&jj);CHKERRQ(ierr);
1031   count = 0;
1032   for (i=0; i<a->mbs; i++) {
1033     for (j=0; j<bs; j++) {
1034       for (k=a->i[i]; k<a->i[i+1]; k++) {
1035         for (l=0; l<bs; l++) {
1036           jj[count++] = bs*a->j[k] + l;
1037         }
1038       }
1039     }
1040   }
1041   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,0);CHKERRQ(ierr);
1042   ierr = PetscFree(jj);CHKERRQ(ierr);
1043 
1044   /* store nonzero values */
1045   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1046   count = 0;
1047   for (i=0; i<a->mbs; i++) {
1048     for (j=0; j<bs; j++) {
1049       for (k=a->i[i]; k<a->i[i+1]; k++) {
1050         for (l=0; l<bs; l++) {
1051           aa[count++] = a->a[bs2*k + l*bs + j];
1052         }
1053       }
1054     }
1055   }
1056   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,0);CHKERRQ(ierr);
1057   ierr = PetscFree(aa);CHKERRQ(ierr);
1058 
1059   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1060   if (file) {
1061     fprintf(file,"-matload_block_size %d\n",a->bs);
1062   }
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1068 static int MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1069 {
1070   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1071   int               ierr,i,j,bs = a->bs,k,l,bs2=a->bs2;
1072   PetscViewerFormat format;
1073 
1074   PetscFunctionBegin;
1075   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1076   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1077     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
1078   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1079     Mat aij;
1080     ierr = MatConvert(A,MATSEQAIJ,&aij);CHKERRQ(ierr);
1081     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1082     ierr = MatDestroy(aij);CHKERRQ(ierr);
1083   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1084      PetscFunctionReturn(0);
1085   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1086     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1087     for (i=0; i<a->mbs; i++) {
1088       for (j=0; j<bs; j++) {
1089         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1090         for (k=a->i[i]; k<a->i[i+1]; k++) {
1091           for (l=0; l<bs; l++) {
1092 #if defined(PETSC_USE_COMPLEX)
1093             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1094               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %gi) ",bs*a->j[k]+l,
1095                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1096             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1097               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %gi) ",bs*a->j[k]+l,
1098                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1099             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1100               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1101             }
1102 #else
1103             if (a->a[bs2*k + l*bs + j] != 0.0) {
1104               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1105             }
1106 #endif
1107           }
1108         }
1109         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1110       }
1111     }
1112     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1113   } else {
1114     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1115     for (i=0; i<a->mbs; i++) {
1116       for (j=0; j<bs; j++) {
1117         ierr = PetscViewerASCIIPrintf(viewer,"row %d:",i*bs+j);CHKERRQ(ierr);
1118         for (k=a->i[i]; k<a->i[i+1]; k++) {
1119           for (l=0; l<bs; l++) {
1120 #if defined(PETSC_USE_COMPLEX)
1121             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1122               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g + %g i) ",bs*a->j[k]+l,
1123                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1124             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1125               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g - %g i) ",bs*a->j[k]+l,
1126                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1127             } else {
1128               ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1129             }
1130 #else
1131             ierr = PetscViewerASCIIPrintf(viewer," (%d, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1132 #endif
1133           }
1134         }
1135         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1136       }
1137     }
1138     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1139   }
1140   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1146 static int MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1147 {
1148   Mat          A = (Mat) Aa;
1149   Mat_SeqBAIJ  *a=(Mat_SeqBAIJ*)A->data;
1150   int          row,ierr,i,j,k,l,mbs=a->mbs,color,bs=a->bs,bs2=a->bs2;
1151   PetscReal    xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1152   MatScalar    *aa;
1153   PetscViewer  viewer;
1154 
1155   PetscFunctionBegin;
1156 
1157   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1158   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1159 
1160   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1161 
1162   /* loop over matrix elements drawing boxes */
1163   color = PETSC_DRAW_BLUE;
1164   for (i=0,row=0; i<mbs; i++,row+=bs) {
1165     for (j=a->i[i]; j<a->i[i+1]; j++) {
1166       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1167       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1168       aa = a->a + j*bs2;
1169       for (k=0; k<bs; k++) {
1170         for (l=0; l<bs; l++) {
1171           if (PetscRealPart(*aa++) >=  0.) continue;
1172           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1173         }
1174       }
1175     }
1176   }
1177   color = PETSC_DRAW_CYAN;
1178   for (i=0,row=0; i<mbs; i++,row+=bs) {
1179     for (j=a->i[i]; j<a->i[i+1]; j++) {
1180       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1181       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1182       aa = a->a + j*bs2;
1183       for (k=0; k<bs; k++) {
1184         for (l=0; l<bs; l++) {
1185           if (PetscRealPart(*aa++) != 0.) continue;
1186           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1187         }
1188       }
1189     }
1190   }
1191 
1192   color = PETSC_DRAW_RED;
1193   for (i=0,row=0; i<mbs; i++,row+=bs) {
1194     for (j=a->i[i]; j<a->i[i+1]; j++) {
1195       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1196       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1197       aa = a->a + j*bs2;
1198       for (k=0; k<bs; k++) {
1199         for (l=0; l<bs; l++) {
1200           if (PetscRealPart(*aa++) <= 0.) continue;
1201           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1202         }
1203       }
1204     }
1205   }
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 #undef __FUNCT__
1210 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1211 static int MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1212 {
1213   int          ierr;
1214   PetscReal    xl,yl,xr,yr,w,h;
1215   PetscDraw    draw;
1216   PetscTruth   isnull;
1217 
1218   PetscFunctionBegin;
1219 
1220   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1221   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1222 
1223   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1224   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1225   xr += w;    yr += h;  xl = -w;     yl = -h;
1226   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1227   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1228   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 #undef __FUNCT__
1233 #define __FUNCT__ "MatView_SeqBAIJ"
1234 int MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1235 {
1236   int        ierr;
1237   PetscTruth isascii,isbinary,isdraw;
1238 
1239   PetscFunctionBegin;
1240   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
1241   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1242   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1243   if (isascii){
1244     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1245   } else if (isbinary) {
1246     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1247   } else if (isdraw) {
1248     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1249   } else {
1250     Mat B;
1251     ierr = MatConvert(A,MATSEQAIJ,&B);CHKERRQ(ierr);
1252     ierr = MatView(B,viewer);CHKERRQ(ierr);
1253     ierr = MatDestroy(B);CHKERRQ(ierr);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1261 int MatGetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],PetscScalar v[])
1262 {
1263   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1264   int        *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1265   int        *ai = a->i,*ailen = a->ilen;
1266   int        brow,bcol,ridx,cidx,bs=a->bs,bs2=a->bs2;
1267   MatScalar  *ap,*aa = a->a,zero = 0.0;
1268 
1269   PetscFunctionBegin;
1270   for (k=0; k<m; k++) { /* loop over rows */
1271     row  = im[k]; brow = row/bs;
1272     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1273     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %d too large", row);
1274     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1275     nrow = ailen[brow];
1276     for (l=0; l<n; l++) { /* loop over columns */
1277       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1278       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %d too large", in[l]);
1279       col  = in[l] ;
1280       bcol = col/bs;
1281       cidx = col%bs;
1282       ridx = row%bs;
1283       high = nrow;
1284       low  = 0; /* assume unsorted */
1285       while (high-low > 5) {
1286         t = (low+high)/2;
1287         if (rp[t] > bcol) high = t;
1288         else             low  = t;
1289       }
1290       for (i=low; i<high; i++) {
1291         if (rp[i] > bcol) break;
1292         if (rp[i] == bcol) {
1293           *v++ = ap[bs2*i+bs*cidx+ridx];
1294           goto finished;
1295         }
1296       }
1297       *v++ = zero;
1298       finished:;
1299     }
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #if defined(PETSC_USE_MAT_SINGLE)
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1307 int MatSetValuesBlocked_SeqBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
1308 {
1309   Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)mat->data;
1310   int         ierr,i,N = m*n*b->bs2;
1311   MatScalar   *vsingle;
1312 
1313   PetscFunctionBegin;
1314   if (N > b->setvalueslen) {
1315     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1316     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1317     b->setvalueslen  = N;
1318   }
1319   vsingle = b->setvaluescopy;
1320   for (i=0; i<N; i++) {
1321     vsingle[i] = v[i];
1322   }
1323   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 #endif
1327 
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1331 int MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode is)
1332 {
1333   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1334   int               *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1335   int               *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1336   int               *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=a->bs,stepval,ierr;
1337   PetscTruth        roworiented=a->roworiented;
1338   const MatScalar   *value = v;
1339   MatScalar         *ap,*aa = a->a,*bap;
1340 
1341   PetscFunctionBegin;
1342   if (roworiented) {
1343     stepval = (n-1)*bs;
1344   } else {
1345     stepval = (m-1)*bs;
1346   }
1347   for (k=0; k<m; k++) { /* loop over added rows */
1348     row  = im[k];
1349     if (row < 0) continue;
1350 #if defined(PETSC_USE_BOPT_g)
1351     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,a->mbs-1);
1352 #endif
1353     rp   = aj + ai[row];
1354     ap   = aa + bs2*ai[row];
1355     rmax = imax[row];
1356     nrow = ailen[row];
1357     low  = 0;
1358     for (l=0; l<n; l++) { /* loop over added columns */
1359       if (in[l] < 0) continue;
1360 #if defined(PETSC_USE_BOPT_g)
1361       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],a->nbs-1);
1362 #endif
1363       col = in[l];
1364       if (roworiented) {
1365         value = v + k*(stepval+bs)*bs + l*bs;
1366       } else {
1367         value = v + l*(stepval+bs)*bs + k*bs;
1368       }
1369       if (!sorted) low = 0; high = nrow;
1370       while (high-low > 7) {
1371         t = (low+high)/2;
1372         if (rp[t] > col) high = t;
1373         else             low  = t;
1374       }
1375       for (i=low; i<high; i++) {
1376         if (rp[i] > col) break;
1377         if (rp[i] == col) {
1378           bap  = ap +  bs2*i;
1379           if (roworiented) {
1380             if (is == ADD_VALUES) {
1381               for (ii=0; ii<bs; ii++,value+=stepval) {
1382                 for (jj=ii; jj<bs2; jj+=bs) {
1383                   bap[jj] += *value++;
1384                 }
1385               }
1386             } else {
1387               for (ii=0; ii<bs; ii++,value+=stepval) {
1388                 for (jj=ii; jj<bs2; jj+=bs) {
1389                   bap[jj] = *value++;
1390                 }
1391               }
1392             }
1393           } else {
1394             if (is == ADD_VALUES) {
1395               for (ii=0; ii<bs; ii++,value+=stepval) {
1396                 for (jj=0; jj<bs; jj++) {
1397                   *bap++ += *value++;
1398                 }
1399               }
1400             } else {
1401               for (ii=0; ii<bs; ii++,value+=stepval) {
1402                 for (jj=0; jj<bs; jj++) {
1403                   *bap++  = *value++;
1404                 }
1405               }
1406             }
1407           }
1408           goto noinsert2;
1409         }
1410       }
1411       if (nonew == 1) goto noinsert2;
1412       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1413       if (nrow >= rmax) {
1414         /* there is no extra room in row, therefore enlarge */
1415         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1416         MatScalar *new_a;
1417 
1418         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1419 
1420         /* malloc new storage space */
1421         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1422 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1423         new_j   = (int*)(new_a + bs2*new_nz);
1424         new_i   = new_j + new_nz;
1425 
1426         /* copy over old data into new slots */
1427         for (ii=0; ii<row+1; ii++) {new_i[ii] = ai[ii];}
1428         for (ii=row+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1429         ierr = PetscMemcpy(new_j,aj,(ai[row]+nrow)*sizeof(int));CHKERRQ(ierr);
1430         len  = (new_nz - CHUNKSIZE - ai[row] - nrow);
1431         ierr = PetscMemcpy(new_j+ai[row]+nrow+CHUNKSIZE,aj+ai[row]+nrow,len*sizeof(int));CHKERRQ(ierr);
1432         ierr = PetscMemcpy(new_a,aa,(ai[row]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1433         ierr = PetscMemzero(new_a+bs2*(ai[row]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1434         ierr = PetscMemcpy(new_a+bs2*(ai[row]+nrow+CHUNKSIZE),aa+bs2*(ai[row]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1435         /* free up old matrix storage */
1436         ierr = PetscFree(a->a);CHKERRQ(ierr);
1437         if (!a->singlemalloc) {
1438           ierr = PetscFree(a->i);CHKERRQ(ierr);
1439           ierr = PetscFree(a->j);CHKERRQ(ierr);
1440         }
1441         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1442         a->singlemalloc = PETSC_TRUE;
1443 
1444         rp   = aj + ai[row]; ap = aa + bs2*ai[row];
1445         rmax = imax[row] = imax[row] + CHUNKSIZE;
1446         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1447         a->maxnz += bs2*CHUNKSIZE;
1448         a->reallocs++;
1449         a->nz++;
1450       }
1451       N = nrow++ - 1;
1452       /* shift up all the later entries in this row */
1453       for (ii=N; ii>=i; ii--) {
1454         rp[ii+1] = rp[ii];
1455         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1456       }
1457       if (N >= i) {
1458         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1459       }
1460       rp[i] = col;
1461       bap   = ap +  bs2*i;
1462       if (roworiented) {
1463         for (ii=0; ii<bs; ii++,value+=stepval) {
1464           for (jj=ii; jj<bs2; jj+=bs) {
1465             bap[jj] = *value++;
1466           }
1467         }
1468       } else {
1469         for (ii=0; ii<bs; ii++,value+=stepval) {
1470           for (jj=0; jj<bs; jj++) {
1471             *bap++  = *value++;
1472           }
1473         }
1474       }
1475       noinsert2:;
1476       low = i;
1477     }
1478     ailen[row] = nrow;
1479   }
1480   PetscFunctionReturn(0);
1481 }
1482 
1483 #undef __FUNCT__
1484 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1485 int MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1486 {
1487   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1488   int        fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1489   int        m = A->m,*ip,N,*ailen = a->ilen;
1490   int        mbs = a->mbs,bs2 = a->bs2,rmax = 0,ierr;
1491   MatScalar  *aa = a->a,*ap;
1492 
1493   PetscFunctionBegin;
1494   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1495 
1496   if (m) rmax = ailen[0];
1497   for (i=1; i<mbs; i++) {
1498     /* move each row back by the amount of empty slots (fshift) before it*/
1499     fshift += imax[i-1] - ailen[i-1];
1500     rmax   = PetscMax(rmax,ailen[i]);
1501     if (fshift) {
1502       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1503       N = ailen[i];
1504       for (j=0; j<N; j++) {
1505         ip[j-fshift] = ip[j];
1506         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1507       }
1508     }
1509     ai[i] = ai[i-1] + ailen[i-1];
1510   }
1511   if (mbs) {
1512     fshift += imax[mbs-1] - ailen[mbs-1];
1513     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1514   }
1515   /* reset ilen and imax for each row */
1516   for (i=0; i<mbs; i++) {
1517     ailen[i] = imax[i] = ai[i+1] - ai[i];
1518   }
1519   a->nz = ai[mbs];
1520 
1521   /* diagonals may have moved, so kill the diagonal pointers */
1522   a->idiagvalid = PETSC_FALSE;
1523   if (fshift && a->diag) {
1524     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1525     PetscLogObjectMemory(A,-(mbs+1)*sizeof(int));
1526     a->diag = 0;
1527   }
1528   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %d X %d, block size %d; storage space: %d unneeded, %d used\n",m,A->n,a->bs,fshift*bs2,a->nz*bs2);
1529   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %d\n",a->reallocs);
1530   PetscLogInfo(A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %d\n",rmax);
1531   a->reallocs          = 0;
1532   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1533 
1534   PetscFunctionReturn(0);
1535 }
1536 
1537 
1538 
1539 /*
1540    This function returns an array of flags which indicate the locations of contiguous
1541    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1542    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1543    Assume: sizes should be long enough to hold all the values.
1544 */
1545 #undef __FUNCT__
1546 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1547 static int MatZeroRows_SeqBAIJ_Check_Blocks(int idx[],int n,int bs,int sizes[], int *bs_max)
1548 {
1549   int        i,j,k,row;
1550   PetscTruth flg;
1551 
1552   PetscFunctionBegin;
1553   for (i=0,j=0; i<n; j++) {
1554     row = idx[i];
1555     if (row%bs!=0) { /* Not the begining of a block */
1556       sizes[j] = 1;
1557       i++;
1558     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1559       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1560       i++;
1561     } else { /* Begining of the block, so check if the complete block exists */
1562       flg = PETSC_TRUE;
1563       for (k=1; k<bs; k++) {
1564         if (row+k != idx[i+k]) { /* break in the block */
1565           flg = PETSC_FALSE;
1566           break;
1567         }
1568       }
1569       if (flg == PETSC_TRUE) { /* No break in the bs */
1570         sizes[j] = bs;
1571         i+= bs;
1572       } else {
1573         sizes[j] = 1;
1574         i++;
1575       }
1576     }
1577   }
1578   *bs_max = j;
1579   PetscFunctionReturn(0);
1580 }
1581 
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1584 int MatZeroRows_SeqBAIJ(Mat A,IS is,const PetscScalar *diag)
1585 {
1586   Mat_SeqBAIJ *baij=(Mat_SeqBAIJ*)A->data;
1587   int         ierr,i,j,k,count,is_n,*is_idx,*rows;
1588   int         bs=baij->bs,bs2=baij->bs2,*sizes,row,bs_max;
1589   PetscScalar zero = 0.0;
1590   MatScalar   *aa;
1591 
1592   PetscFunctionBegin;
1593   /* Make a copy of the IS and  sort it */
1594   ierr = ISGetLocalSize(is,&is_n);CHKERRQ(ierr);
1595   ierr = ISGetIndices(is,&is_idx);CHKERRQ(ierr);
1596 
1597   /* allocate memory for rows,sizes */
1598   ierr  = PetscMalloc((3*is_n+1)*sizeof(int),&rows);CHKERRQ(ierr);
1599   sizes = rows + is_n;
1600 
1601   /* copy IS values to rows, and sort them */
1602   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1603   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1604   if (baij->keepzeroedrows) {
1605     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1606     bs_max = is_n;
1607   } else {
1608     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1609   }
1610   ierr = ISRestoreIndices(is,&is_idx);CHKERRQ(ierr);
1611 
1612   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1613     row   = rows[j];
1614     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %d out of range",row);
1615     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1616     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1617     if (sizes[i] == bs && !baij->keepzeroedrows) {
1618       if (diag) {
1619         if (baij->ilen[row/bs] > 0) {
1620           baij->ilen[row/bs]       = 1;
1621           baij->j[baij->i[row/bs]] = row/bs;
1622           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1623         }
1624         /* Now insert all the diagonal values for this bs */
1625         for (k=0; k<bs; k++) {
1626           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,diag,INSERT_VALUES);CHKERRQ(ierr);
1627         }
1628       } else { /* (!diag) */
1629         baij->ilen[row/bs] = 0;
1630       } /* end (!diag) */
1631     } else { /* (sizes[i] != bs) */
1632 #if defined (PETSC_USE_DEBUG)
1633       if (sizes[i] != 1) SETERRQ(1,"Internal Error. Value should be 1");
1634 #endif
1635       for (k=0; k<count; k++) {
1636         aa[0] =  zero;
1637         aa    += bs;
1638       }
1639       if (diag) {
1640         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,diag,INSERT_VALUES);CHKERRQ(ierr);
1641       }
1642     }
1643   }
1644 
1645   ierr = PetscFree(rows);CHKERRQ(ierr);
1646   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1647   PetscFunctionReturn(0);
1648 }
1649 
1650 #undef __FUNCT__
1651 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1652 int MatSetValues_SeqBAIJ(Mat A,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode is)
1653 {
1654   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1655   int         *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,sorted=a->sorted;
1656   int         *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1657   int         *aj=a->j,nonew=a->nonew,bs=a->bs,brow,bcol;
1658   int         ridx,cidx,bs2=a->bs2,ierr;
1659   PetscTruth  roworiented=a->roworiented;
1660   MatScalar   *ap,value,*aa=a->a,*bap;
1661 
1662   PetscFunctionBegin;
1663   for (k=0; k<m; k++) { /* loop over added rows */
1664     row  = im[k]; brow = row/bs;
1665     if (row < 0) continue;
1666 #if defined(PETSC_USE_BOPT_g)
1667     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",row,A->m-1);
1668 #endif
1669     rp   = aj + ai[brow];
1670     ap   = aa + bs2*ai[brow];
1671     rmax = imax[brow];
1672     nrow = ailen[brow];
1673     low  = 0;
1674     for (l=0; l<n; l++) { /* loop over added columns */
1675       if (in[l] < 0) continue;
1676 #if defined(PETSC_USE_BOPT_g)
1677       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[l],A->n-1);
1678 #endif
1679       col = in[l]; bcol = col/bs;
1680       ridx = row % bs; cidx = col % bs;
1681       if (roworiented) {
1682         value = v[l + k*n];
1683       } else {
1684         value = v[k + l*m];
1685       }
1686       if (!sorted) low = 0; high = nrow;
1687       while (high-low > 7) {
1688         t = (low+high)/2;
1689         if (rp[t] > bcol) high = t;
1690         else              low  = t;
1691       }
1692       for (i=low; i<high; i++) {
1693         if (rp[i] > bcol) break;
1694         if (rp[i] == bcol) {
1695           bap  = ap +  bs2*i + bs*cidx + ridx;
1696           if (is == ADD_VALUES) *bap += value;
1697           else                  *bap  = value;
1698           goto noinsert1;
1699         }
1700       }
1701       if (nonew == 1) goto noinsert1;
1702       else if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1703       if (nrow >= rmax) {
1704         /* there is no extra room in row, therefore enlarge */
1705         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j;
1706         MatScalar *new_a;
1707 
1708         if (nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col);
1709 
1710         /* Malloc new storage space */
1711         len     = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int);
1712 	ierr    = PetscMalloc(len,&new_a);CHKERRQ(ierr);
1713         new_j   = (int*)(new_a + bs2*new_nz);
1714         new_i   = new_j + new_nz;
1715 
1716         /* copy over old data into new slots */
1717         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];}
1718         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;}
1719         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr);
1720         len  = (new_nz - CHUNKSIZE - ai[brow] - nrow);
1721         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr);
1722         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr);
1723         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr);
1724         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE),aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);
1725         /* free up old matrix storage */
1726         ierr = PetscFree(a->a);CHKERRQ(ierr);
1727         if (!a->singlemalloc) {
1728           ierr = PetscFree(a->i);CHKERRQ(ierr);
1729           ierr = PetscFree(a->j);CHKERRQ(ierr);
1730         }
1731         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;
1732         a->singlemalloc = PETSC_TRUE;
1733 
1734         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1735         rmax = imax[brow] = imax[brow] + CHUNKSIZE;
1736         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar)));
1737         a->maxnz += bs2*CHUNKSIZE;
1738         a->reallocs++;
1739         a->nz++;
1740       }
1741       N = nrow++ - 1;
1742       /* shift up all the later entries in this row */
1743       for (ii=N; ii>=i; ii--) {
1744         rp[ii+1] = rp[ii];
1745         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1746       }
1747       if (N>=i) {
1748         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1749       }
1750       rp[i]                      = bcol;
1751       ap[bs2*i + bs*cidx + ridx] = value;
1752       noinsert1:;
1753       low = i;
1754     }
1755     ailen[brow] = nrow;
1756   }
1757   PetscFunctionReturn(0);
1758 }
1759 
1760 
1761 #undef __FUNCT__
1762 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1763 int MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1764 {
1765   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)inA->data;
1766   Mat         outA;
1767   int         ierr;
1768   PetscTruth  row_identity,col_identity;
1769 
1770   PetscFunctionBegin;
1771   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1772   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1773   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1774   if (!row_identity || !col_identity) {
1775     SETERRQ(1,"Row and column permutations must be identity for in-place ILU");
1776   }
1777 
1778   outA          = inA;
1779   inA->factor   = FACTOR_LU;
1780 
1781   if (!a->diag) {
1782     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1783   }
1784 
1785   a->row        = row;
1786   a->col        = col;
1787   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1788   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1789 
1790   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1791   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1792   PetscLogObjectParent(inA,a->icol);
1793 
1794   /*
1795       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1796       for ILU(0) factorization with natural ordering
1797   */
1798   if (a->bs < 8) {
1799     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1800   } else {
1801     if (!a->solve_work) {
1802       ierr = PetscMalloc((inA->m+a->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1803       PetscLogObjectMemory(inA,(inA->m+a->bs)*sizeof(PetscScalar));
1804     }
1805   }
1806 
1807   ierr = MatLUFactorNumeric(inA,&outA);CHKERRQ(ierr);
1808 
1809   PetscFunctionReturn(0);
1810 }
1811 #undef __FUNCT__
1812 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1813 int MatPrintHelp_SeqBAIJ(Mat A)
1814 {
1815   static PetscTruth called = PETSC_FALSE;
1816   MPI_Comm          comm = A->comm;
1817   int               ierr;
1818 
1819   PetscFunctionBegin;
1820   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1821   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1822   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 EXTERN_C_BEGIN
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1829 int MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,int *indices)
1830 {
1831   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1832   int         i,nz,nbs;
1833 
1834   PetscFunctionBegin;
1835   nz  = baij->maxnz/baij->bs2;
1836   nbs = baij->nbs;
1837   for (i=0; i<nz; i++) {
1838     baij->j[i] = indices[i];
1839   }
1840   baij->nz = nz;
1841   for (i=0; i<nbs; i++) {
1842     baij->ilen[i] = baij->imax[i];
1843   }
1844 
1845   PetscFunctionReturn(0);
1846 }
1847 EXTERN_C_END
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1851 /*@
1852     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1853        in the matrix.
1854 
1855   Input Parameters:
1856 +  mat - the SeqBAIJ matrix
1857 -  indices - the column indices
1858 
1859   Level: advanced
1860 
1861   Notes:
1862     This can be called if you have precomputed the nonzero structure of the
1863   matrix and want to provide it to the matrix object to improve the performance
1864   of the MatSetValues() operation.
1865 
1866     You MUST have set the correct numbers of nonzeros per row in the call to
1867   MatCreateSeqBAIJ().
1868 
1869     MUST be called before any calls to MatSetValues();
1870 
1871 @*/
1872 int MatSeqBAIJSetColumnIndices(Mat mat,int *indices)
1873 {
1874   int ierr,(*f)(Mat,int *);
1875 
1876   PetscFunctionBegin;
1877   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1878   PetscValidPointer(indices,2);
1879   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1880   if (f) {
1881     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1882   } else {
1883     SETERRQ(1,"Wrong type of matrix to set column indices");
1884   }
1885   PetscFunctionReturn(0);
1886 }
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1890 int MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1891 {
1892   Mat_SeqBAIJ  *a = (Mat_SeqBAIJ*)A->data;
1893   int          ierr,i,j,n,row,bs,*ai,*aj,mbs;
1894   PetscReal    atmp;
1895   PetscScalar  *x,zero = 0.0;
1896   MatScalar    *aa;
1897   int          ncols,brow,krow,kcol;
1898 
1899   PetscFunctionBegin;
1900   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1901   bs   = a->bs;
1902   aa   = a->a;
1903   ai   = a->i;
1904   aj   = a->j;
1905   mbs = a->mbs;
1906 
1907   ierr = VecSet(&zero,v);CHKERRQ(ierr);
1908   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1909   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1910   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1911   for (i=0; i<mbs; i++) {
1912     ncols = ai[1] - ai[0]; ai++;
1913     brow  = bs*i;
1914     for (j=0; j<ncols; j++){
1915       /* bcol = bs*(*aj); */
1916       for (kcol=0; kcol<bs; kcol++){
1917         for (krow=0; krow<bs; krow++){
1918           atmp = PetscAbsScalar(*aa); aa++;
1919           row = brow + krow;    /* row index */
1920           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1921           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1922         }
1923       }
1924       aj++;
1925     }
1926   }
1927   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1933 int MatSetUpPreallocation_SeqBAIJ(Mat A)
1934 {
1935   int        ierr;
1936 
1937   PetscFunctionBegin;
1938   ierr =  MatSeqBAIJSetPreallocation(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1944 int MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1945 {
1946   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1947   PetscFunctionBegin;
1948   *array = a->a;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1954 int MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1955 {
1956   PetscFunctionBegin;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscblaslapack.h"
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1963 int MatAXPY_SeqBAIJ(const PetscScalar *a,Mat X,Mat Y,MatStructure str)
1964 {
1965   Mat_SeqBAIJ  *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1966   int          ierr,one=1,i,bs=y->bs,j,bs2;
1967 
1968   PetscFunctionBegin;
1969   if (str == SAME_NONZERO_PATTERN) {
1970     BLaxpy_(&x->nz,(PetscScalar*)a,x->a,&one,y->a,&one);
1971   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1972     if (y->xtoy && y->XtoY != X) {
1973       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1974       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1975     }
1976     if (!y->xtoy) { /* get xtoy */
1977       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1978       y->XtoY = X;
1979     }
1980     bs2 = bs*bs;
1981     for (i=0; i<x->nz; i++) {
1982       j = 0;
1983       while (j < bs2){
1984         y->a[bs2*y->xtoy[i]+j] += (*a)*(x->a[bs2*i+j]);
1985         j++;
1986       }
1987     }
1988     PetscLogInfo(0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %d/%d = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));
1989   } else {
1990     ierr = MatAXPY_Basic(a,X,Y,str);CHKERRQ(ierr);
1991   }
1992   PetscFunctionReturn(0);
1993 }
1994 
1995 /* -------------------------------------------------------------------*/
1996 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
1997        MatGetRow_SeqBAIJ,
1998        MatRestoreRow_SeqBAIJ,
1999        MatMult_SeqBAIJ_N,
2000 /* 4*/ MatMultAdd_SeqBAIJ_N,
2001        MatMultTranspose_SeqBAIJ,
2002        MatMultTransposeAdd_SeqBAIJ,
2003        MatSolve_SeqBAIJ_N,
2004        0,
2005        0,
2006 /*10*/ 0,
2007        MatLUFactor_SeqBAIJ,
2008        0,
2009        0,
2010        MatTranspose_SeqBAIJ,
2011 /*15*/ MatGetInfo_SeqBAIJ,
2012        MatEqual_SeqBAIJ,
2013        MatGetDiagonal_SeqBAIJ,
2014        MatDiagonalScale_SeqBAIJ,
2015        MatNorm_SeqBAIJ,
2016 /*20*/ 0,
2017        MatAssemblyEnd_SeqBAIJ,
2018        0,
2019        MatSetOption_SeqBAIJ,
2020        MatZeroEntries_SeqBAIJ,
2021 /*25*/ MatZeroRows_SeqBAIJ,
2022        MatLUFactorSymbolic_SeqBAIJ,
2023        MatLUFactorNumeric_SeqBAIJ_N,
2024        0,
2025        0,
2026 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2027        MatILUFactorSymbolic_SeqBAIJ,
2028        0,
2029        MatGetArray_SeqBAIJ,
2030        MatRestoreArray_SeqBAIJ,
2031 /*35*/ MatDuplicate_SeqBAIJ,
2032        0,
2033        0,
2034        MatILUFactor_SeqBAIJ,
2035        0,
2036 /*40*/ MatAXPY_SeqBAIJ,
2037        MatGetSubMatrices_SeqBAIJ,
2038        MatIncreaseOverlap_SeqBAIJ,
2039        MatGetValues_SeqBAIJ,
2040        0,
2041 /*45*/ MatPrintHelp_SeqBAIJ,
2042        MatScale_SeqBAIJ,
2043        0,
2044        0,
2045        0,
2046 /*50*/ MatGetBlockSize_SeqBAIJ,
2047        MatGetRowIJ_SeqBAIJ,
2048        MatRestoreRowIJ_SeqBAIJ,
2049        0,
2050        0,
2051 /*55*/ 0,
2052        0,
2053        0,
2054        0,
2055        MatSetValuesBlocked_SeqBAIJ,
2056 /*60*/ MatGetSubMatrix_SeqBAIJ,
2057        MatDestroy_SeqBAIJ,
2058        MatView_SeqBAIJ,
2059        MatGetPetscMaps_Petsc,
2060        0,
2061 /*65*/ 0,
2062        0,
2063        0,
2064        0,
2065        0,
2066 /*70*/ MatGetRowMax_SeqBAIJ,
2067        MatConvert_Basic,
2068        0,
2069        0,
2070        0,
2071 /*75*/ 0,
2072        0,
2073        0,
2074        0,
2075        0,
2076 /*80*/ 0,
2077        0,
2078        0,
2079        0,
2080 /*85*/ MatLoad_SeqBAIJ,
2081        0,
2082        0,
2083        0,
2084        0
2085 };
2086 
2087 EXTERN_C_BEGIN
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2090 int MatStoreValues_SeqBAIJ(Mat mat)
2091 {
2092   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2093   int         nz = aij->i[mat->m]*aij->bs*aij->bs2;
2094   int         ierr;
2095 
2096   PetscFunctionBegin;
2097   if (aij->nonew != 1) {
2098     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2099   }
2100 
2101   /* allocate space for values if not already there */
2102   if (!aij->saved_values) {
2103     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2104   }
2105 
2106   /* copy values over */
2107   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2108   PetscFunctionReturn(0);
2109 }
2110 EXTERN_C_END
2111 
2112 EXTERN_C_BEGIN
2113 #undef __FUNCT__
2114 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2115 int MatRetrieveValues_SeqBAIJ(Mat mat)
2116 {
2117   Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)mat->data;
2118   int         nz = aij->i[mat->m]*aij->bs*aij->bs2,ierr;
2119 
2120   PetscFunctionBegin;
2121   if (aij->nonew != 1) {
2122     SETERRQ(1,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2123   }
2124   if (!aij->saved_values) {
2125     SETERRQ(1,"Must call MatStoreValues(A);first");
2126   }
2127 
2128   /* copy values over */
2129   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2130   PetscFunctionReturn(0);
2131 }
2132 EXTERN_C_END
2133 
2134 EXTERN_C_BEGIN
2135 extern int MatConvert_SeqBAIJ_SeqAIJ(Mat,const MatType,Mat*);
2136 extern int MatConvert_SeqBAIJ_SeqSBAIJ(Mat,const MatType,Mat*);
2137 EXTERN_C_END
2138 
2139 EXTERN_C_BEGIN
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2142 int MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,int bs,int nz,int *nnz)
2143 {
2144   Mat_SeqBAIJ *b;
2145   int         i,len,ierr,mbs,nbs,bs2,newbs = bs;
2146   PetscTruth  flg;
2147 
2148   PetscFunctionBegin;
2149 
2150   B->preallocated = PETSC_TRUE;
2151   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2152   if (nnz && newbs != bs) {
2153     SETERRQ(1,"Cannot change blocksize from command line if setting nnz");
2154   }
2155   bs   = newbs;
2156 
2157   mbs  = B->m/bs;
2158   nbs  = B->n/bs;
2159   bs2  = bs*bs;
2160 
2161   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2162     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %d, cols %d must be divisible by blocksize %d",B->m,B->n,bs);
2163   }
2164 
2165   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2166   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %d",nz);
2167   if (nnz) {
2168     for (i=0; i<mbs; i++) {
2169       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %d value %d",i,nnz[i]);
2170       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %d value %d rowlength %d",i,nnz[i],nbs);
2171     }
2172   }
2173 
2174   b       = (Mat_SeqBAIJ*)B->data;
2175   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2176   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2177   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2178   if (!flg) {
2179     switch (bs) {
2180     case 1:
2181       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2182       B->ops->mult            = MatMult_SeqBAIJ_1;
2183       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2184       break;
2185     case 2:
2186       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2187       B->ops->mult            = MatMult_SeqBAIJ_2;
2188       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2189       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2190       break;
2191     case 3:
2192       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2193       B->ops->mult            = MatMult_SeqBAIJ_3;
2194       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2195       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2196       break;
2197     case 4:
2198       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2199       B->ops->mult            = MatMult_SeqBAIJ_4;
2200       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2201       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2202       break;
2203     case 5:
2204       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2205       B->ops->mult            = MatMult_SeqBAIJ_5;
2206       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2207       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2208       break;
2209     case 6:
2210       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2211       B->ops->mult            = MatMult_SeqBAIJ_6;
2212       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2213       break;
2214     case 7:
2215       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2216       B->ops->mult            = MatMult_SeqBAIJ_7;
2217       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2218       break;
2219     default:
2220       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2221       B->ops->mult            = MatMult_SeqBAIJ_N;
2222       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2223       break;
2224     }
2225   }
2226   b->bs      = bs;
2227   b->mbs     = mbs;
2228   b->nbs     = nbs;
2229   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->imax);CHKERRQ(ierr);
2230   if (!nnz) {
2231     if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2232     else if (nz <= 0)        nz = 1;
2233     for (i=0; i<mbs; i++) b->imax[i] = nz;
2234     nz = nz*mbs;
2235   } else {
2236     nz = 0;
2237     for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2238   }
2239 
2240   /* allocate the matrix space */
2241   len   = nz*sizeof(int) + nz*bs2*sizeof(MatScalar) + (B->m+1)*sizeof(int);
2242   ierr  = PetscMalloc(len,&b->a);CHKERRQ(ierr);
2243   ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2244   b->j  = (int*)(b->a + nz*bs2);
2245   ierr = PetscMemzero(b->j,nz*sizeof(int));CHKERRQ(ierr);
2246   b->i  = b->j + nz;
2247   b->singlemalloc = PETSC_TRUE;
2248 
2249   b->i[0] = 0;
2250   for (i=1; i<mbs+1; i++) {
2251     b->i[i] = b->i[i-1] + b->imax[i-1];
2252   }
2253 
2254   /* b->ilen will count nonzeros in each block row so far. */
2255   ierr = PetscMalloc((mbs+1)*sizeof(int),&b->ilen);CHKERRQ(ierr);
2256   PetscLogObjectMemory(B,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2257   for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2258 
2259   b->bs               = bs;
2260   b->bs2              = bs2;
2261   b->mbs              = mbs;
2262   b->nz               = 0;
2263   b->maxnz            = nz*bs2;
2264   B->info.nz_unneeded = (PetscReal)b->maxnz;
2265   PetscFunctionReturn(0);
2266 }
2267 EXTERN_C_END
2268 
2269 /*MC
2270    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2271    block sparse compressed row format.
2272 
2273    Options Database Keys:
2274 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2275 
2276   Level: beginner
2277 
2278 .seealso: MatCreateSeqBAIJ
2279 M*/
2280 
2281 EXTERN_C_BEGIN
2282 #undef __FUNCT__
2283 #define __FUNCT__ "MatCreate_SeqBAIJ"
2284 int MatCreate_SeqBAIJ(Mat B)
2285 {
2286   int         ierr,size;
2287   Mat_SeqBAIJ *b;
2288 
2289   PetscFunctionBegin;
2290   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2291   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2292 
2293   B->m = B->M = PetscMax(B->m,B->M);
2294   B->n = B->N = PetscMax(B->n,B->N);
2295   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2296   B->data = (void*)b;
2297   ierr    = PetscMemzero(b,sizeof(Mat_SeqBAIJ));CHKERRQ(ierr);
2298   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2299   B->factor           = 0;
2300   B->lupivotthreshold = 1.0;
2301   B->mapping          = 0;
2302   b->row              = 0;
2303   b->col              = 0;
2304   b->icol             = 0;
2305   b->reallocs         = 0;
2306   b->saved_values     = 0;
2307 #if defined(PETSC_USE_MAT_SINGLE)
2308   b->setvalueslen     = 0;
2309   b->setvaluescopy    = PETSC_NULL;
2310 #endif
2311 
2312   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2313   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2314 
2315   b->sorted           = PETSC_FALSE;
2316   b->roworiented      = PETSC_TRUE;
2317   b->nonew            = 0;
2318   b->diag             = 0;
2319   b->solve_work       = 0;
2320   b->mult_work        = 0;
2321   B->spptr            = 0;
2322   B->info.nz_unneeded = (PetscReal)b->maxnz;
2323   b->keepzeroedrows   = PETSC_FALSE;
2324   b->xtoy              = 0;
2325   b->XtoY              = 0;
2326 
2327   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2328                                      "MatStoreValues_SeqBAIJ",
2329                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2330   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2331                                      "MatRetrieveValues_SeqBAIJ",
2332                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2334                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2335                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2337                                      "MatConvert_SeqBAIJ_SeqAIJ",
2338                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2339 ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2340                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2341                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2343                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2344                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2345   PetscFunctionReturn(0);
2346 }
2347 EXTERN_C_END
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2351 int MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2352 {
2353   Mat         C;
2354   Mat_SeqBAIJ *c,*a = (Mat_SeqBAIJ*)A->data;
2355   int         i,len,mbs = a->mbs,nz = a->nz,bs2 =a->bs2,ierr;
2356 
2357   PetscFunctionBegin;
2358   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2359 
2360   *B = 0;
2361   ierr = MatCreate(A->comm,A->m,A->n,A->m,A->n,&C);CHKERRQ(ierr);
2362   ierr = MatSetType(C,MATSEQBAIJ);CHKERRQ(ierr);
2363   c    = (Mat_SeqBAIJ*)C->data;
2364 
2365   C->M   = A->M;
2366   C->N   = A->N;
2367   c->bs  = a->bs;
2368   c->bs2 = a->bs2;
2369   c->mbs = a->mbs;
2370   c->nbs = a->nbs;
2371   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2372 
2373   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->imax);CHKERRQ(ierr);
2374   ierr = PetscMalloc((mbs+1)*sizeof(int),&c->ilen);CHKERRQ(ierr);
2375   for (i=0; i<mbs; i++) {
2376     c->imax[i] = a->imax[i];
2377     c->ilen[i] = a->ilen[i];
2378   }
2379 
2380   /* allocate the matrix space */
2381   c->singlemalloc = PETSC_TRUE;
2382   len  = (mbs+1)*sizeof(int) + nz*(bs2*sizeof(MatScalar) + sizeof(int));
2383   ierr = PetscMalloc(len,&c->a);CHKERRQ(ierr);
2384   c->j = (int*)(c->a + nz*bs2);
2385   c->i = c->j + nz;
2386   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(int));CHKERRQ(ierr);
2387   if (mbs > 0) {
2388     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(int));CHKERRQ(ierr);
2389     if (cpvalues == MAT_COPY_VALUES) {
2390       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2391     } else {
2392       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2393     }
2394   }
2395 
2396   PetscLogObjectMemory(C,len+2*(mbs+1)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_SeqBAIJ));
2397   c->sorted      = a->sorted;
2398   c->roworiented = a->roworiented;
2399   c->nonew       = a->nonew;
2400 
2401   if (a->diag) {
2402     ierr = PetscMalloc((mbs+1)*sizeof(int),&c->diag);CHKERRQ(ierr);
2403     PetscLogObjectMemory(C,(mbs+1)*sizeof(int));
2404     for (i=0; i<mbs; i++) {
2405       c->diag[i] = a->diag[i];
2406     }
2407   } else c->diag        = 0;
2408   c->nz                 = a->nz;
2409   c->maxnz              = a->maxnz;
2410   c->solve_work         = 0;
2411   C->spptr              = 0;     /* Dangerous -I'm throwing away a->spptr */
2412   c->mult_work          = 0;
2413   C->preallocated       = PETSC_TRUE;
2414   C->assembled          = PETSC_TRUE;
2415   *B = C;
2416   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatLoad_SeqBAIJ"
2422 int MatLoad_SeqBAIJ(PetscViewer viewer,const MatType type,Mat *A)
2423 {
2424   Mat_SeqBAIJ  *a;
2425   Mat          B;
2426   int          i,nz,ierr,fd,header[4],size,*rowlengths=0,M,N,bs=1;
2427   int          *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2428   int          kmax,jcount,block,idx,point,nzcountb,extra_rows;
2429   int          *masked,nmask,tmp,bs2,ishift;
2430   PetscScalar  *aa;
2431   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2432 
2433   PetscFunctionBegin;
2434   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2435   bs2  = bs*bs;
2436 
2437   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2438   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2439   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2440   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2441   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2442   M = header[1]; N = header[2]; nz = header[3];
2443 
2444   if (header[3] < 0) {
2445     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2446   }
2447 
2448   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2449 
2450   /*
2451      This code adds extra rows to make sure the number of rows is
2452     divisible by the blocksize
2453   */
2454   mbs        = M/bs;
2455   extra_rows = bs - M + bs*(mbs);
2456   if (extra_rows == bs) extra_rows = 0;
2457   else                  mbs++;
2458   if (extra_rows) {
2459     PetscLogInfo(0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n");
2460   }
2461 
2462   /* read in row lengths */
2463   ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2464   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2465   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2466 
2467   /* read in column indices */
2468   ierr = PetscMalloc((nz+extra_rows)*sizeof(int),&jj);CHKERRQ(ierr);
2469   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2470   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2471 
2472   /* loop over row lengths determining block row lengths */
2473   ierr     = PetscMalloc(mbs*sizeof(int),&browlengths);CHKERRQ(ierr);
2474   ierr     = PetscMemzero(browlengths,mbs*sizeof(int));CHKERRQ(ierr);
2475   ierr     = PetscMalloc(2*mbs*sizeof(int),&mask);CHKERRQ(ierr);
2476   ierr     = PetscMemzero(mask,mbs*sizeof(int));CHKERRQ(ierr);
2477   masked   = mask + mbs;
2478   rowcount = 0; nzcount = 0;
2479   for (i=0; i<mbs; i++) {
2480     nmask = 0;
2481     for (j=0; j<bs; j++) {
2482       kmax = rowlengths[rowcount];
2483       for (k=0; k<kmax; k++) {
2484         tmp = jj[nzcount++]/bs;
2485         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2486       }
2487       rowcount++;
2488     }
2489     browlengths[i] += nmask;
2490     /* zero out the mask elements we set */
2491     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2492   }
2493 
2494   /* create our matrix */
2495   ierr = MatCreate(comm,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows,&B);
2496   ierr = MatSetType(B,type);CHKERRQ(ierr);
2497   ierr = MatSeqBAIJSetPreallocation(B,bs,0,browlengths);CHKERRQ(ierr);
2498   a = (Mat_SeqBAIJ*)B->data;
2499 
2500   /* set matrix "i" values */
2501   a->i[0] = 0;
2502   for (i=1; i<= mbs; i++) {
2503     a->i[i]      = a->i[i-1] + browlengths[i-1];
2504     a->ilen[i-1] = browlengths[i-1];
2505   }
2506   a->nz         = 0;
2507   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2508 
2509   /* read in nonzero values */
2510   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2511   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2512   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2513 
2514   /* set "a" and "j" values into matrix */
2515   nzcount = 0; jcount = 0;
2516   for (i=0; i<mbs; i++) {
2517     nzcountb = nzcount;
2518     nmask    = 0;
2519     for (j=0; j<bs; j++) {
2520       kmax = rowlengths[i*bs+j];
2521       for (k=0; k<kmax; k++) {
2522         tmp = jj[nzcount++]/bs;
2523 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2524       }
2525     }
2526     /* sort the masked values */
2527     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2528 
2529     /* set "j" values into matrix */
2530     maskcount = 1;
2531     for (j=0; j<nmask; j++) {
2532       a->j[jcount++]  = masked[j];
2533       mask[masked[j]] = maskcount++;
2534     }
2535     /* set "a" values into matrix */
2536     ishift = bs2*a->i[i];
2537     for (j=0; j<bs; j++) {
2538       kmax = rowlengths[i*bs+j];
2539       for (k=0; k<kmax; k++) {
2540         tmp       = jj[nzcountb]/bs ;
2541         block     = mask[tmp] - 1;
2542         point     = jj[nzcountb] - bs*tmp;
2543         idx       = ishift + bs2*block + j + bs*point;
2544         a->a[idx] = (MatScalar)aa[nzcountb++];
2545       }
2546     }
2547     /* zero out the mask elements we set */
2548     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2549   }
2550   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2551 
2552   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2553   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2554   ierr = PetscFree(aa);CHKERRQ(ierr);
2555   ierr = PetscFree(jj);CHKERRQ(ierr);
2556   ierr = PetscFree(mask);CHKERRQ(ierr);
2557 
2558   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2559   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2560   ierr = MatView_Private(B);CHKERRQ(ierr);
2561 
2562   *A = B;
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatCreateSeqBAIJ"
2568 /*@C
2569    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2570    compressed row) format.  For good matrix assembly performance the
2571    user should preallocate the matrix storage by setting the parameter nz
2572    (or the array nnz).  By setting these parameters accurately, performance
2573    during matrix assembly can be increased by more than a factor of 50.
2574 
2575    Collective on MPI_Comm
2576 
2577    Input Parameters:
2578 +  comm - MPI communicator, set to PETSC_COMM_SELF
2579 .  bs - size of block
2580 .  m - number of rows
2581 .  n - number of columns
2582 .  nz - number of nonzero blocks  per block row (same for all rows)
2583 -  nnz - array containing the number of nonzero blocks in the various block rows
2584          (possibly different for each block row) or PETSC_NULL
2585 
2586    Output Parameter:
2587 .  A - the matrix
2588 
2589    Options Database Keys:
2590 .   -mat_no_unroll - uses code that does not unroll the loops in the
2591                      block calculations (much slower)
2592 .    -mat_block_size - size of the blocks to use
2593 
2594    Level: intermediate
2595 
2596    Notes:
2597    A nonzero block is any block that as 1 or more nonzeros in it
2598 
2599    The block AIJ format is fully compatible with standard Fortran 77
2600    storage.  That is, the stored row and column indices can begin at
2601    either one (as in Fortran) or zero.  See the users' manual for details.
2602 
2603    Specify the preallocated storage with either nz or nnz (not both).
2604    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2605    allocation.  For additional details, see the users manual chapter on
2606    matrices.
2607 
2608 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2609 @*/
2610 int MatCreateSeqBAIJ(MPI_Comm comm,int bs,int m,int n,int nz,const int nnz[],Mat *A)
2611 {
2612   int ierr;
2613 
2614   PetscFunctionBegin;
2615   ierr = MatCreate(comm,m,n,m,n,A);CHKERRQ(ierr);
2616   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2617   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,nnz);CHKERRQ(ierr);
2618   PetscFunctionReturn(0);
2619 }
2620 
2621 #undef __FUNCT__
2622 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2623 /*@C
2624    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2625    per row in the matrix. For good matrix assembly performance the
2626    user should preallocate the matrix storage by setting the parameter nz
2627    (or the array nnz).  By setting these parameters accurately, performance
2628    during matrix assembly can be increased by more than a factor of 50.
2629 
2630    Collective on MPI_Comm
2631 
2632    Input Parameters:
2633 +  A - the matrix
2634 .  bs - size of block
2635 .  nz - number of block nonzeros per block row (same for all rows)
2636 -  nnz - array containing the number of block nonzeros in the various block rows
2637          (possibly different for each block row) or PETSC_NULL
2638 
2639    Options Database Keys:
2640 .   -mat_no_unroll - uses code that does not unroll the loops in the
2641                      block calculations (much slower)
2642 .    -mat_block_size - size of the blocks to use
2643 
2644    Level: intermediate
2645 
2646    Notes:
2647    The block AIJ format is fully compatible with standard Fortran 77
2648    storage.  That is, the stored row and column indices can begin at
2649    either one (as in Fortran) or zero.  See the users' manual for details.
2650 
2651    Specify the preallocated storage with either nz or nnz (not both).
2652    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2653    allocation.  For additional details, see the users manual chapter on
2654    matrices.
2655 
2656 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2657 @*/
2658 int MatSeqBAIJSetPreallocation(Mat B,int bs,int nz,const int nnz[])
2659 {
2660   int ierr,(*f)(Mat,int,int,const int[]);
2661 
2662   PetscFunctionBegin;
2663   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2664   if (f) {
2665     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2666   }
2667   PetscFunctionReturn(0);
2668 }
2669 
2670