xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 09f3b4e5628a00a1eaf17d80982cfbcc515cc9c1)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   its = its*lits;
129   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
130   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
131   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
132   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
133   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
134 
135   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
136 
137   diag  = a->diag;
138   idiag = a->idiag;
139   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
140   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
141 
142   if (flag & SOR_ZERO_INITIAL_GUESS) {
143     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
144       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
145       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
146       i2     = 2;
147       idiag += 4;
148       for (i=1; i<m; i++) {
149 	v     = aa + 4*ai[i];
150 	vi    = aj + ai[i];
151 	nz    = diag[i] - ai[i];
152 	s1    = b[i2]; s2 = b[i2+1];
153 	while (nz--) {
154 	  idx  = 2*(*vi++);
155 	  x1   = x[idx]; x2 = x[1+idx];
156 	  s1  -= v[0]*x1 + v[2]*x2;
157 	  s2  -= v[1]*x1 + v[3]*x2;
158 	  v   += 4;
159 	}
160 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
161 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
162         idiag   += 4;
163         i2      += 2;
164       }
165       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
166       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
167     }
168     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
169         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
170       i2    = 0;
171       mdiag = a->idiag+4*a->mbs;
172       for (i=0; i<m; i++) {
173         x1      = x[i2]; x2 = x[i2+1];
174         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
175         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
176         mdiag  += 4;
177         i2     += 2;
178       }
179       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
180     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
181       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
182     }
183     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
184       idiag   = a->idiag+4*a->mbs - 4;
185       i2      = 2*m - 2;
186       x1      = x[i2]; x2 = x[i2+1];
187       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
188       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
189       idiag -= 4;
190       i2    -= 2;
191       for (i=m-2; i>=0; i--) {
192 	v     = aa + 4*(diag[i]+1);
193 	vi    = aj + diag[i] + 1;
194 	nz    = ai[i+1] - diag[i] - 1;
195 	s1    = x[i2]; s2 = x[i2+1];
196 	while (nz--) {
197 	  idx  = 2*(*vi++);
198 	  x1   = x[idx]; x2 = x[1+idx];
199 	  s1  -= v[0]*x1 + v[2]*x2;
200 	  s2  -= v[1]*x1 + v[3]*x2;
201 	  v   += 4;
202 	}
203 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
204 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
205         idiag   -= 4;
206         i2      -= 2;
207       }
208       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
220 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
224   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
225   PetscErrorCode     ierr;
226   PetscInt           m = a->mbs,i,i2,nz,idx;
227   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
228 
229   PetscFunctionBegin;
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
235   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
246       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
248       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
249       i2     = 3;
250       idiag += 9;
251       for (i=1; i<m; i++) {
252 	v     = aa + 9*ai[i];
253 	vi    = aj + ai[i];
254 	nz    = diag[i] - ai[i];
255 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
256 	while (nz--) {
257 	  idx  = 3*(*vi++);
258 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
259 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
260 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
261 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
262 	  v   += 9;
263 	}
264 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
265 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
266 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
267         idiag   += 9;
268         i2      += 3;
269       }
270       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
271       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
272     }
273     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
274         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
275       i2    = 0;
276       mdiag = a->idiag+9*a->mbs;
277       for (i=0; i<m; i++) {
278         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
279         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
280         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
281         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
282         mdiag  += 9;
283         i2     += 3;
284       }
285       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
286     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
287       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
288     }
289     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
290       idiag   = a->idiag+9*a->mbs - 9;
291       i2      = 3*m - 3;
292       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
293       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
294       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
295       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
296       idiag -= 9;
297       i2    -= 3;
298       for (i=m-2; i>=0; i--) {
299 	v     = aa + 9*(diag[i]+1);
300 	vi    = aj + diag[i] + 1;
301 	nz    = ai[i+1] - diag[i] - 1;
302 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
303 	while (nz--) {
304 	  idx  = 3*(*vi++);
305 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
306 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
307 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
308 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
309 	  v   += 9;
310 	}
311 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
312 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
313 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
314         idiag   -= 9;
315         i2      -= 3;
316       }
317       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
329 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
333   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
334   PetscErrorCode     ierr;
335   PetscInt           m = a->mbs,i,i2,nz,idx;
336   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
337 
338   PetscFunctionBegin;
339   its = its*lits;
340   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
341   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
342   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
343   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
344   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
345 
346   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
347 
348   diag  = a->diag;
349   idiag = a->idiag;
350   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
351   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
352 
353   if (flag & SOR_ZERO_INITIAL_GUESS) {
354     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
355       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
356       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
357       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
358       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
359       i2     = 4;
360       idiag += 16;
361       for (i=1; i<m; i++) {
362 	v     = aa + 16*ai[i];
363 	vi    = aj + ai[i];
364 	nz    = diag[i] - ai[i];
365 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
366 	while (nz--) {
367 	  idx  = 4*(*vi++);
368 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
369 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
370 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
371 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
372 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
373 	  v   += 16;
374 	}
375 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
376 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
377 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
378 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
379         idiag   += 16;
380         i2      += 4;
381       }
382       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
383       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
384     }
385     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
386         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
387       i2    = 0;
388       mdiag = a->idiag+16*a->mbs;
389       for (i=0; i<m; i++) {
390         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
391         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
392         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
393         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
394         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
395         mdiag  += 16;
396         i2     += 4;
397       }
398       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
399     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
400       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
401     }
402     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
403       idiag   = a->idiag+16*a->mbs - 16;
404       i2      = 4*m - 4;
405       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
406       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
407       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
408       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
409       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
410       idiag -= 16;
411       i2    -= 4;
412       for (i=m-2; i>=0; i--) {
413 	v     = aa + 16*(diag[i]+1);
414 	vi    = aj + diag[i] + 1;
415 	nz    = ai[i+1] - diag[i] - 1;
416 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
417 	while (nz--) {
418 	  idx  = 4*(*vi++);
419 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
420 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
421 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
422 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
423 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
424 	  v   += 16;
425 	}
426 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
427 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
428 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
429 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
430         idiag   -= 16;
431         i2      -= 4;
432       }
433       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
434     }
435   } else {
436     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
437   }
438   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
439   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
440   PetscFunctionReturn(0);
441 }
442 
443 #undef __FUNCT__
444 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
445 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
446 {
447   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
448   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
449   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
450   PetscErrorCode     ierr;
451   PetscInt           m = a->mbs,i,i2,nz,idx;
452   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
453 
454   PetscFunctionBegin;
455   its = its*lits;
456   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
457   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
458   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
459   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
460   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
461 
462   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
463 
464   diag  = a->diag;
465   idiag = a->idiag;
466   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
467   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
468 
469   if (flag & SOR_ZERO_INITIAL_GUESS) {
470     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
471       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
472       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
473       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
474       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
475       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
476       i2     = 5;
477       idiag += 25;
478       for (i=1; i<m; i++) {
479 	v     = aa + 25*ai[i];
480 	vi    = aj + ai[i];
481 	nz    = diag[i] - ai[i];
482 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
483 	while (nz--) {
484 	  idx  = 5*(*vi++);
485 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
486 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
487 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
488 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
489 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
490 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
491 	  v   += 25;
492 	}
493 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
494 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
495 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
496 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
497 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
498         idiag   += 25;
499         i2      += 5;
500       }
501       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
502       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
503     }
504     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
505         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
506       i2    = 0;
507       mdiag = a->idiag+25*a->mbs;
508       for (i=0; i<m; i++) {
509         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
510         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
511         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
512         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
513         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
514         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
515         mdiag  += 25;
516         i2     += 5;
517       }
518       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
519     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
520       ierr = PetscMemcpy(x,b,A->m*sizeof(PetscScalar));CHKERRQ(ierr);
521     }
522     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
523       idiag   = a->idiag+25*a->mbs - 25;
524       i2      = 5*m - 5;
525       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
526       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
527       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
528       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
529       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
530       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
531       idiag -= 25;
532       i2    -= 5;
533       for (i=m-2; i>=0; i--) {
534 	v     = aa + 25*(diag[i]+1);
535 	vi    = aj + diag[i] + 1;
536 	nz    = ai[i+1] - diag[i] - 1;
537 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
538 	while (nz--) {
539 	  idx  = 5*(*vi++);
540 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
541 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
542 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
543 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
544 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
545 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
546 	  v   += 25;
547 	}
548 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
549 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
550 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
551 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
552 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
553         idiag   -= 25;
554         i2      -= 5;
555       }
556       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
557     }
558   } else {
559     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
560   }
561   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
562   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
563   PetscFunctionReturn(0);
564 }
565 
566 /*
567     Special version for direct calls from Fortran (Used in PETSc-fun3d)
568 */
569 #if defined(PETSC_HAVE_FORTRAN_CAPS)
570 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
571 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
572 #define matsetvaluesblocked4_ matsetvaluesblocked4
573 #endif
574 
575 EXTERN_C_BEGIN
576 #undef __FUNCT__
577 #define __FUNCT__ "matsetvaluesblocked4_"
578 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
579 {
580   Mat               A = *AA;
581   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
582   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
583   PetscInt          *ai=a->i,*ailen=a->ilen;
584   PetscInt          *aj=a->j,stepval,lastcol = -1;
585   const PetscScalar *value = v;
586   MatScalar         *ap,*aa = a->a,*bap;
587 
588   PetscFunctionBegin;
589   if (A->bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
590   stepval = (n-1)*4;
591   for (k=0; k<m; k++) { /* loop over added rows */
592     row  = im[k];
593     rp   = aj + ai[row];
594     ap   = aa + 16*ai[row];
595     nrow = ailen[row];
596     low  = 0;
597     high = nrow;
598     for (l=0; l<n; l++) { /* loop over added columns */
599       col = in[l];
600       if (col <= lastcol) low = 0; else high = nrow;
601       lastcol = col;
602       value = v + k*(stepval+4)*4 + l*4;
603       while (high-low > 7) {
604         t = (low+high)/2;
605         if (rp[t] > col) high = t;
606         else             low  = t;
607       }
608       for (i=low; i<high; i++) {
609         if (rp[i] > col) break;
610         if (rp[i] == col) {
611           bap  = ap +  16*i;
612           for (ii=0; ii<4; ii++,value+=stepval) {
613             for (jj=ii; jj<16; jj+=4) {
614               bap[jj] += *value++;
615             }
616           }
617           goto noinsert2;
618         }
619       }
620       N = nrow++ - 1;
621       high++; /* added new column index thus must search to one higher than before */
622       /* shift up all the later entries in this row */
623       for (ii=N; ii>=i; ii--) {
624         rp[ii+1] = rp[ii];
625         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
626       }
627       if (N >= i) {
628         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
629       }
630       rp[i] = col;
631       bap   = ap +  16*i;
632       for (ii=0; ii<4; ii++,value+=stepval) {
633         for (jj=ii; jj<16; jj+=4) {
634           bap[jj] = *value++;
635         }
636       }
637       noinsert2:;
638       low = i;
639     }
640     ailen[row] = nrow;
641   }
642   PetscFunctionReturnVoid();
643 }
644 EXTERN_C_END
645 
646 #if defined(PETSC_HAVE_FORTRAN_CAPS)
647 #define matsetvalues4_ MATSETVALUES4
648 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
649 #define matsetvalues4_ matsetvalues4
650 #endif
651 
652 EXTERN_C_BEGIN
653 #undef __FUNCT__
654 #define __FUNCT__ "MatSetValues4_"
655 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
656 {
657   Mat         A = *AA;
658   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
659   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
660   PetscInt    *ai=a->i,*ailen=a->ilen;
661   PetscInt    *aj=a->j,brow,bcol;
662   PetscInt    ridx,cidx,lastcol = -1;
663   MatScalar   *ap,value,*aa=a->a,*bap;
664 
665   PetscFunctionBegin;
666   for (k=0; k<m; k++) { /* loop over added rows */
667     row  = im[k]; brow = row/4;
668     rp   = aj + ai[brow];
669     ap   = aa + 16*ai[brow];
670     nrow = ailen[brow];
671     low  = 0;
672     high = nrow;
673     for (l=0; l<n; l++) { /* loop over added columns */
674       col = in[l]; bcol = col/4;
675       ridx = row % 4; cidx = col % 4;
676       value = v[l + k*n];
677       if (col <= lastcol) low = 0; else high = nrow;
678       lastcol = col;
679       while (high-low > 7) {
680         t = (low+high)/2;
681         if (rp[t] > bcol) high = t;
682         else              low  = t;
683       }
684       for (i=low; i<high; i++) {
685         if (rp[i] > bcol) break;
686         if (rp[i] == bcol) {
687           bap  = ap +  16*i + 4*cidx + ridx;
688           *bap += value;
689           goto noinsert1;
690         }
691       }
692       N = nrow++ - 1;
693       high++; /* added new column thus must search to one higher than before */
694       /* shift up all the later entries in this row */
695       for (ii=N; ii>=i; ii--) {
696         rp[ii+1] = rp[ii];
697         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
698       }
699       if (N>=i) {
700         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
701       }
702       rp[i]                    = bcol;
703       ap[16*i + 4*cidx + ridx] = value;
704       noinsert1:;
705       low = i;
706     }
707     ailen[brow] = nrow;
708   }
709   PetscFunctionReturnVoid();
710 }
711 EXTERN_C_END
712 
713 /*  UGLY, ugly, ugly
714    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
715    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
716    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
717    converts the entries into single precision and then calls ..._MatScalar() to put them
718    into the single precision data structures.
719 */
720 #if defined(PETSC_USE_MAT_SINGLE)
721 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
722 #else
723 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
724 #endif
725 
726 #define CHUNKSIZE  10
727 
728 /*
729      Checks for missing diagonals
730 */
731 #undef __FUNCT__
732 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
733 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
734 {
735   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
736   PetscErrorCode ierr;
737   PetscInt       *diag,*jj = a->j,i;
738 
739   PetscFunctionBegin;
740   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
741   diag = a->diag;
742   for (i=0; i<a->mbs; i++) {
743     if (jj[diag[i]] != i) {
744       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
745     }
746   }
747   PetscFunctionReturn(0);
748 }
749 
750 #undef __FUNCT__
751 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
752 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
753 {
754   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
755   PetscErrorCode ierr;
756   PetscInt       i,j,*diag,m = a->mbs;
757 
758   PetscFunctionBegin;
759   if (a->diag) PetscFunctionReturn(0);
760 
761   ierr = PetscMalloc((m+1)*sizeof(PetscInt),&diag);CHKERRQ(ierr);
762   ierr = PetscLogObjectMemory(A,(m+1)*sizeof(PetscInt));CHKERRQ(ierr);
763   for (i=0; i<m; i++) {
764     diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         diag[i] = j;
768         break;
769       }
770     }
771   }
772   a->diag = diag;
773   PetscFunctionReturn(0);
774 }
775 
776 
777 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
778 
779 #undef __FUNCT__
780 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
781 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
782 {
783   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
784   PetscErrorCode ierr;
785   PetscInt       n = a->mbs,i;
786 
787   PetscFunctionBegin;
788   *nn = n;
789   if (!ia) PetscFunctionReturn(0);
790   if (symmetric) {
791     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
792   } else if (oshift == 1) {
793     /* temporarily add 1 to i and j indices */
794     PetscInt nz = a->i[n];
795     for (i=0; i<nz; i++) a->j[i]++;
796     for (i=0; i<n+1; i++) a->i[i]++;
797     *ia = a->i; *ja = a->j;
798   } else {
799     *ia = a->i; *ja = a->j;
800   }
801 
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
807 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
808 {
809   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
810   PetscErrorCode ierr;
811   PetscInt       i,n = a->mbs;
812 
813   PetscFunctionBegin;
814   if (!ia) PetscFunctionReturn(0);
815   if (symmetric) {
816     ierr = PetscFree(*ia);CHKERRQ(ierr);
817     ierr = PetscFree(*ja);CHKERRQ(ierr);
818   } else if (oshift == 1) {
819     PetscInt nz = a->i[n]-1;
820     for (i=0; i<nz; i++) a->j[i]--;
821     for (i=0; i<n+1; i++) a->i[i]--;
822   }
823   PetscFunctionReturn(0);
824 }
825 
826 #undef __FUNCT__
827 #define __FUNCT__ "MatDestroy_SeqBAIJ"
828 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
829 {
830   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
831   PetscErrorCode ierr;
832 
833   PetscFunctionBegin;
834 #if defined(PETSC_USE_LOG)
835   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->m,A->n,a->nz);
836 #endif
837   ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,&a->a,&a->j,&a->i);CHKERRQ(ierr);
838   if (a->row) {
839     ierr = ISDestroy(a->row);CHKERRQ(ierr);
840   }
841   if (a->col) {
842     ierr = ISDestroy(a->col);CHKERRQ(ierr);
843   }
844   if (a->diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
845   if (a->idiag) {ierr = PetscFree(a->idiag);CHKERRQ(ierr);}
846   if (a->imax) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
847   if (a->solve_work) {ierr = PetscFree(a->solve_work);CHKERRQ(ierr);}
848   if (a->mult_work) {ierr = PetscFree(a->mult_work);CHKERRQ(ierr);}
849   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
850   if (a->saved_values) {ierr = PetscFree(a->saved_values);CHKERRQ(ierr);}
851 #if defined(PETSC_USE_MAT_SINGLE)
852   if (a->setvaluescopy) {ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);}
853 #endif
854   if (a->xtoy) {ierr = PetscFree(a->xtoy);CHKERRQ(ierr);}
855   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
856 
857   ierr = PetscFree(a);CHKERRQ(ierr);
858 
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetOption_SeqBAIJ"
871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872 {
873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   switch (op) {
878   case MAT_ROW_ORIENTED:
879     a->roworiented    = PETSC_TRUE;
880     break;
881   case MAT_COLUMN_ORIENTED:
882     a->roworiented    = PETSC_FALSE;
883     break;
884   case MAT_COLUMNS_SORTED:
885     a->sorted         = PETSC_TRUE;
886     break;
887   case MAT_COLUMNS_UNSORTED:
888     a->sorted         = PETSC_FALSE;
889     break;
890   case MAT_KEEP_ZEROED_ROWS:
891     a->keepzeroedrows = PETSC_TRUE;
892     break;
893   case MAT_NO_NEW_NONZERO_LOCATIONS:
894     a->nonew          = 1;
895     break;
896   case MAT_NEW_NONZERO_LOCATION_ERR:
897     a->nonew          = -1;
898     break;
899   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900     a->nonew          = -2;
901     break;
902   case MAT_YES_NEW_NONZERO_LOCATIONS:
903     a->nonew          = 0;
904     break;
905   case MAT_ROWS_SORTED:
906   case MAT_ROWS_UNSORTED:
907   case MAT_YES_NEW_DIAGONALS:
908   case MAT_IGNORE_OFF_PROC_ENTRIES:
909   case MAT_USE_HASH_TABLE:
910     ierr = PetscVerboseInfo((A,"MatSetOption_SeqBAIJ:Option ignored\n"));CHKERRQ(ierr);
911     break;
912   case MAT_NO_NEW_DIAGONALS:
913     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914   case MAT_SYMMETRIC:
915   case MAT_STRUCTURALLY_SYMMETRIC:
916   case MAT_NOT_SYMMETRIC:
917   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918   case MAT_HERMITIAN:
919   case MAT_NOT_HERMITIAN:
920   case MAT_SYMMETRY_ETERNAL:
921   case MAT_NOT_SYMMETRY_ETERNAL:
922     break;
923   default:
924     SETERRQ(PETSC_ERR_SUP,"unknown option");
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatGetRow_SeqBAIJ"
931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
932 {
933   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
934   PetscErrorCode ierr;
935   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
936   MatScalar      *aa,*aa_i;
937   PetscScalar    *v_i;
938 
939   PetscFunctionBegin;
940   bs  = A->bs;
941   ai  = a->i;
942   aj  = a->j;
943   aa  = a->a;
944   bs2 = a->bs2;
945 
946   if (row < 0 || row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
947 
948   bn  = row/bs;   /* Block number */
949   bp  = row % bs; /* Block Position */
950   M   = ai[bn+1] - ai[bn];
951   *nz = bs*M;
952 
953   if (v) {
954     *v = 0;
955     if (*nz) {
956       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
957       for (i=0; i<M; i++) { /* for each block in the block row */
958         v_i  = *v + i*bs;
959         aa_i = aa + bs2*(ai[bn] + i);
960         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
961       }
962     }
963   }
964 
965   if (idx) {
966     *idx = 0;
967     if (*nz) {
968       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
969       for (i=0; i<M; i++) { /* for each block in the block row */
970         idx_i = *idx + i*bs;
971         itmp  = bs*aj[ai[bn] + i];
972         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
973       }
974     }
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
982 {
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   if (idx) {if (*idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}}
987   if (v)   {if (*v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}}
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatTranspose_SeqBAIJ"
993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
994 {
995   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
996   Mat            C;
997   PetscErrorCode ierr;
998   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
999   PetscInt       *rows,*cols,bs2=a->bs2;
1000   PetscScalar    *array;
1001 
1002   PetscFunctionBegin;
1003   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1004   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1005   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1006 
1007 #if defined(PETSC_USE_MAT_SINGLE)
1008   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1009   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1010 #else
1011   array = a->a;
1012 #endif
1013 
1014   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1015   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1016   ierr = MatSetSizes(C,A->n,A->m,A->n,A->m);CHKERRQ(ierr);
1017   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1018   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1019   ierr = PetscFree(col);CHKERRQ(ierr);
1020   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1021   cols = rows + bs;
1022   for (i=0; i<mbs; i++) {
1023     cols[0] = i*bs;
1024     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1025     len = ai[i+1] - ai[i];
1026     for (j=0; j<len; j++) {
1027       rows[0] = (*aj++)*bs;
1028       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1029       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1030       array += bs2;
1031     }
1032   }
1033   ierr = PetscFree(rows);CHKERRQ(ierr);
1034 #if defined(PETSC_USE_MAT_SINGLE)
1035   ierr = PetscFree(array);
1036 #endif
1037 
1038   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040 
1041   if (B) {
1042     *B = C;
1043   } else {
1044     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1051 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1052 {
1053   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1054   PetscErrorCode ierr;
1055   PetscInt       i,*col_lens,bs = A->bs,count,*jj,j,k,l,bs2=a->bs2;
1056   int            fd;
1057   PetscScalar    *aa;
1058   FILE           *file;
1059 
1060   PetscFunctionBegin;
1061   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1062   ierr        = PetscMalloc((4+A->m)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1063   col_lens[0] = MAT_FILE_COOKIE;
1064 
1065   col_lens[1] = A->m;
1066   col_lens[2] = A->n;
1067   col_lens[3] = a->nz*bs2;
1068 
1069   /* store lengths of each row and write (including header) to file */
1070   count = 0;
1071   for (i=0; i<a->mbs; i++) {
1072     for (j=0; j<bs; j++) {
1073       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1074     }
1075   }
1076   ierr = PetscBinaryWrite(fd,col_lens,4+A->m,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1077   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1078 
1079   /* store column indices (zero start index) */
1080   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1081   count = 0;
1082   for (i=0; i<a->mbs; i++) {
1083     for (j=0; j<bs; j++) {
1084       for (k=a->i[i]; k<a->i[i+1]; k++) {
1085         for (l=0; l<bs; l++) {
1086           jj[count++] = bs*a->j[k] + l;
1087         }
1088       }
1089     }
1090   }
1091   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1092   ierr = PetscFree(jj);CHKERRQ(ierr);
1093 
1094   /* store nonzero values */
1095   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1096   count = 0;
1097   for (i=0; i<a->mbs; i++) {
1098     for (j=0; j<bs; j++) {
1099       for (k=a->i[i]; k<a->i[i+1]; k++) {
1100         for (l=0; l<bs; l++) {
1101           aa[count++] = a->a[bs2*k + l*bs + j];
1102         }
1103       }
1104     }
1105   }
1106   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1107   ierr = PetscFree(aa);CHKERRQ(ierr);
1108 
1109   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1110   if (file) {
1111     fprintf(file,"-matload_block_size %d\n",(int)A->bs);
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1118 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1119 {
1120   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1121   PetscErrorCode    ierr;
1122   PetscInt          i,j,bs = A->bs,k,l,bs2=a->bs2;
1123   PetscViewerFormat format;
1124 
1125   PetscFunctionBegin;
1126   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1127   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1128     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1129   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1130     Mat aij;
1131     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1132     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1133     ierr = MatDestroy(aij);CHKERRQ(ierr);
1134   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1135      PetscFunctionReturn(0);
1136   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1137     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1138     for (i=0; i<a->mbs; i++) {
1139       for (j=0; j<bs; j++) {
1140         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1141         for (k=a->i[i]; k<a->i[i+1]; k++) {
1142           for (l=0; l<bs; l++) {
1143 #if defined(PETSC_USE_COMPLEX)
1144             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1145               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1146                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1147             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1149                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1151               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1152             }
1153 #else
1154             if (a->a[bs2*k + l*bs + j] != 0.0) {
1155               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1156             }
1157 #endif
1158           }
1159         }
1160         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1161       }
1162     }
1163     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1164   } else {
1165     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1166     for (i=0; i<a->mbs; i++) {
1167       for (j=0; j<bs; j++) {
1168         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1169         for (k=a->i[i]; k<a->i[i+1]; k++) {
1170           for (l=0; l<bs; l++) {
1171 #if defined(PETSC_USE_COMPLEX)
1172             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1173               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1174                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1175             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1176               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1177                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1178             } else {
1179               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1180             }
1181 #else
1182             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1183 #endif
1184           }
1185         }
1186         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1187       }
1188     }
1189     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1190   }
1191   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1197 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1198 {
1199   Mat            A = (Mat) Aa;
1200   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1201   PetscErrorCode ierr;
1202   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->bs,bs2=a->bs2;
1203   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1204   MatScalar      *aa;
1205   PetscViewer    viewer;
1206 
1207   PetscFunctionBegin;
1208 
1209   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1210   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1211 
1212   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1213 
1214   /* loop over matrix elements drawing boxes */
1215   color = PETSC_DRAW_BLUE;
1216   for (i=0,row=0; i<mbs; i++,row+=bs) {
1217     for (j=a->i[i]; j<a->i[i+1]; j++) {
1218       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1219       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1220       aa = a->a + j*bs2;
1221       for (k=0; k<bs; k++) {
1222         for (l=0; l<bs; l++) {
1223           if (PetscRealPart(*aa++) >=  0.) continue;
1224           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1225         }
1226       }
1227     }
1228   }
1229   color = PETSC_DRAW_CYAN;
1230   for (i=0,row=0; i<mbs; i++,row+=bs) {
1231     for (j=a->i[i]; j<a->i[i+1]; j++) {
1232       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1233       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1234       aa = a->a + j*bs2;
1235       for (k=0; k<bs; k++) {
1236         for (l=0; l<bs; l++) {
1237           if (PetscRealPart(*aa++) != 0.) continue;
1238           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1239         }
1240       }
1241     }
1242   }
1243 
1244   color = PETSC_DRAW_RED;
1245   for (i=0,row=0; i<mbs; i++,row+=bs) {
1246     for (j=a->i[i]; j<a->i[i+1]; j++) {
1247       y_l = A->m - row - 1.0; y_r = y_l + 1.0;
1248       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1249       aa = a->a + j*bs2;
1250       for (k=0; k<bs; k++) {
1251         for (l=0; l<bs; l++) {
1252           if (PetscRealPart(*aa++) <= 0.) continue;
1253           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1254         }
1255       }
1256     }
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1263 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1264 {
1265   PetscErrorCode ierr;
1266   PetscReal      xl,yl,xr,yr,w,h;
1267   PetscDraw      draw;
1268   PetscTruth     isnull;
1269 
1270   PetscFunctionBegin;
1271 
1272   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1273   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1274 
1275   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1276   xr  = A->n; yr = A->m; h = yr/10.0; w = xr/10.0;
1277   xr += w;    yr += h;  xl = -w;     yl = -h;
1278   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1279   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1280   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatView_SeqBAIJ"
1286 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1287 {
1288   PetscErrorCode ierr;
1289   PetscTruth     iascii,isbinary,isdraw;
1290 
1291   PetscFunctionBegin;
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1295   if (iascii){
1296     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1297   } else if (isbinary) {
1298     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1299   } else if (isdraw) {
1300     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1301   } else {
1302     Mat B;
1303     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1304     ierr = MatView(B,viewer);CHKERRQ(ierr);
1305     ierr = MatDestroy(B);CHKERRQ(ierr);
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1313 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1314 {
1315   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1316   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1317   PetscInt    *ai = a->i,*ailen = a->ilen;
1318   PetscInt    brow,bcol,ridx,cidx,bs=A->bs,bs2=a->bs2;
1319   MatScalar   *ap,*aa = a->a,zero = 0.0;
1320 
1321   PetscFunctionBegin;
1322   for (k=0; k<m; k++) { /* loop over rows */
1323     row  = im[k]; brow = row/bs;
1324     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1325     if (row >= A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1326     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1327     nrow = ailen[brow];
1328     for (l=0; l<n; l++) { /* loop over columns */
1329       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1330       if (in[l] >= A->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1331       col  = in[l] ;
1332       bcol = col/bs;
1333       cidx = col%bs;
1334       ridx = row%bs;
1335       high = nrow;
1336       low  = 0; /* assume unsorted */
1337       while (high-low > 5) {
1338         t = (low+high)/2;
1339         if (rp[t] > bcol) high = t;
1340         else             low  = t;
1341       }
1342       for (i=low; i<high; i++) {
1343         if (rp[i] > bcol) break;
1344         if (rp[i] == bcol) {
1345           *v++ = ap[bs2*i+bs*cidx+ridx];
1346           goto finished;
1347         }
1348       }
1349       *v++ = zero;
1350       finished:;
1351     }
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #if defined(PETSC_USE_MAT_SINGLE)
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1359 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1360 {
1361   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1362   PetscErrorCode ierr;
1363   PetscInt       i,N = m*n*b->bs2;
1364   MatScalar      *vsingle;
1365 
1366   PetscFunctionBegin;
1367   if (N > b->setvalueslen) {
1368     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
1369     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1370     b->setvalueslen  = N;
1371   }
1372   vsingle = b->setvaluescopy;
1373   for (i=0; i<N; i++) {
1374     vsingle[i] = v[i];
1375   }
1376   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 #endif
1380 
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1384 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1385 {
1386   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1387   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1388   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1389   PetscErrorCode  ierr;
1390   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->bs,stepval;
1391   PetscTruth      roworiented=a->roworiented;
1392   const MatScalar *value = v;
1393   MatScalar       *ap,*aa = a->a,*bap;
1394 
1395   PetscFunctionBegin;
1396   if (roworiented) {
1397     stepval = (n-1)*bs;
1398   } else {
1399     stepval = (m-1)*bs;
1400   }
1401   for (k=0; k<m; k++) { /* loop over added rows */
1402     row  = im[k];
1403     if (row < 0) continue;
1404 #if defined(PETSC_USE_DEBUG)
1405     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1406 #endif
1407     rp   = aj + ai[row];
1408     ap   = aa + bs2*ai[row];
1409     rmax = imax[row];
1410     nrow = ailen[row];
1411     low  = 0;
1412     high = nrow;
1413     for (l=0; l<n; l++) { /* loop over added columns */
1414       if (in[l] < 0) continue;
1415 #if defined(PETSC_USE_DEBUG)
1416       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1417 #endif
1418       col = in[l];
1419       if (roworiented) {
1420         value = v + k*(stepval+bs)*bs + l*bs;
1421       } else {
1422         value = v + l*(stepval+bs)*bs + k*bs;
1423       }
1424       if (col <= lastcol) low = 0; else high = nrow;
1425       lastcol = col;
1426       while (high-low > 7) {
1427         t = (low+high)/2;
1428         if (rp[t] > col) high = t;
1429         else             low  = t;
1430       }
1431       for (i=low; i<high; i++) {
1432         if (rp[i] > col) break;
1433         if (rp[i] == col) {
1434           bap  = ap +  bs2*i;
1435           if (roworiented) {
1436             if (is == ADD_VALUES) {
1437               for (ii=0; ii<bs; ii++,value+=stepval) {
1438                 for (jj=ii; jj<bs2; jj+=bs) {
1439                   bap[jj] += *value++;
1440                 }
1441               }
1442             } else {
1443               for (ii=0; ii<bs; ii++,value+=stepval) {
1444                 for (jj=ii; jj<bs2; jj+=bs) {
1445                   bap[jj] = *value++;
1446                 }
1447               }
1448             }
1449           } else {
1450             if (is == ADD_VALUES) {
1451               for (ii=0; ii<bs; ii++,value+=stepval) {
1452                 for (jj=0; jj<bs; jj++) {
1453                   *bap++ += *value++;
1454                 }
1455               }
1456             } else {
1457               for (ii=0; ii<bs; ii++,value+=stepval) {
1458                 for (jj=0; jj<bs; jj++) {
1459                   *bap++  = *value++;
1460                 }
1461               }
1462             }
1463           }
1464           goto noinsert2;
1465         }
1466       }
1467       if (nonew == 1) goto noinsert2;
1468       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1469       MatSeqXAIJReallocateAIJ(a,bs2,nrow,row,col,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1470       N = nrow++ - 1; high++;
1471       /* shift up all the later entries in this row */
1472       for (ii=N; ii>=i; ii--) {
1473         rp[ii+1] = rp[ii];
1474         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1475       }
1476       if (N >= i) {
1477         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1478       }
1479       rp[i] = col;
1480       bap   = ap +  bs2*i;
1481       if (roworiented) {
1482         for (ii=0; ii<bs; ii++,value+=stepval) {
1483           for (jj=ii; jj<bs2; jj+=bs) {
1484             bap[jj] = *value++;
1485           }
1486         }
1487       } else {
1488         for (ii=0; ii<bs; ii++,value+=stepval) {
1489           for (jj=0; jj<bs; jj++) {
1490             *bap++  = *value++;
1491           }
1492         }
1493       }
1494       noinsert2:;
1495       low = i;
1496     }
1497     ailen[row] = nrow;
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1504 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1505 {
1506   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1507   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1508   PetscInt       m = A->m,*ip,N,*ailen = a->ilen;
1509   PetscErrorCode ierr;
1510   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1511   MatScalar      *aa = a->a,*ap;
1512   PetscReal      ratio=0.6;
1513 
1514   PetscFunctionBegin;
1515   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1516 
1517   if (m) rmax = ailen[0];
1518   for (i=1; i<mbs; i++) {
1519     /* move each row back by the amount of empty slots (fshift) before it*/
1520     fshift += imax[i-1] - ailen[i-1];
1521     rmax   = PetscMax(rmax,ailen[i]);
1522     if (fshift) {
1523       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1524       N = ailen[i];
1525       for (j=0; j<N; j++) {
1526         ip[j-fshift] = ip[j];
1527         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1528       }
1529     }
1530     ai[i] = ai[i-1] + ailen[i-1];
1531   }
1532   if (mbs) {
1533     fshift += imax[mbs-1] - ailen[mbs-1];
1534     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1535   }
1536   /* reset ilen and imax for each row */
1537   for (i=0; i<mbs; i++) {
1538     ailen[i] = imax[i] = ai[i+1] - ai[i];
1539   }
1540   a->nz = ai[mbs];
1541 
1542   /* diagonals may have moved, so kill the diagonal pointers */
1543   a->idiagvalid = PETSC_FALSE;
1544   if (fshift && a->diag) {
1545     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1546     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1547     a->diag = 0;
1548   }
1549   ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqBAIJ:Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->n,A->bs,fshift*bs2,a->nz*bs2));CHKERRQ(ierr);
1550   ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqBAIJ:Number of mallocs during MatSetValues is %D\n",a->reallocs));CHKERRQ(ierr);
1551   ierr = PetscVerboseInfo((A,"MatAssemblyEnd_SeqBAIJ:Most nonzeros blocks in any row is %D\n",rmax));CHKERRQ(ierr);
1552   a->reallocs          = 0;
1553   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1554 
1555   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1556   if (a->compressedrow.use){
1557     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1558   }
1559 
1560   A->same_nonzero = PETSC_TRUE;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /*
1565    This function returns an array of flags which indicate the locations of contiguous
1566    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1567    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1568    Assume: sizes should be long enough to hold all the values.
1569 */
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1573 {
1574   PetscInt   i,j,k,row;
1575   PetscTruth flg;
1576 
1577   PetscFunctionBegin;
1578   for (i=0,j=0; i<n; j++) {
1579     row = idx[i];
1580     if (row%bs!=0) { /* Not the begining of a block */
1581       sizes[j] = 1;
1582       i++;
1583     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1584       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1585       i++;
1586     } else { /* Begining of the block, so check if the complete block exists */
1587       flg = PETSC_TRUE;
1588       for (k=1; k<bs; k++) {
1589         if (row+k != idx[i+k]) { /* break in the block */
1590           flg = PETSC_FALSE;
1591           break;
1592         }
1593       }
1594       if (flg) { /* No break in the bs */
1595         sizes[j] = bs;
1596         i+= bs;
1597       } else {
1598         sizes[j] = 1;
1599         i++;
1600       }
1601     }
1602   }
1603   *bs_max = j;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1610 {
1611   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1612   PetscErrorCode ierr;
1613   PetscInt       i,j,k,count,*rows;
1614   PetscInt       bs=A->bs,bs2=baij->bs2,*sizes,row,bs_max;
1615   PetscScalar    zero = 0.0;
1616   MatScalar      *aa;
1617 
1618   PetscFunctionBegin;
1619   /* Make a copy of the IS and  sort it */
1620   /* allocate memory for rows,sizes */
1621   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1622   sizes = rows + is_n;
1623 
1624   /* copy IS values to rows, and sort them */
1625   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1626   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1627   if (baij->keepzeroedrows) {
1628     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1629     bs_max = is_n;
1630     A->same_nonzero = PETSC_TRUE;
1631   } else {
1632     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1633     A->same_nonzero = PETSC_FALSE;
1634   }
1635 
1636   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1637     row   = rows[j];
1638     if (row < 0 || row > A->m) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1639     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1640     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1641     if (sizes[i] == bs && !baij->keepzeroedrows) {
1642       if (diag != 0.0) {
1643         if (baij->ilen[row/bs] > 0) {
1644           baij->ilen[row/bs]       = 1;
1645           baij->j[baij->i[row/bs]] = row/bs;
1646           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1647         }
1648         /* Now insert all the diagonal values for this bs */
1649         for (k=0; k<bs; k++) {
1650           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1651         }
1652       } else { /* (diag == 0.0) */
1653         baij->ilen[row/bs] = 0;
1654       } /* end (diag == 0.0) */
1655     } else { /* (sizes[i] != bs) */
1656 #if defined (PETSC_USE_DEBUG)
1657       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1658 #endif
1659       for (k=0; k<count; k++) {
1660         aa[0] =  zero;
1661         aa    += bs;
1662       }
1663       if (diag != 0.0) {
1664         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665       }
1666     }
1667   }
1668 
1669   ierr = PetscFree(rows);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1677 {
1678   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1679   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1680   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1681   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->bs,brow,bcol;
1682   PetscErrorCode ierr;
1683   PetscInt       ridx,cidx,bs2=a->bs2;
1684   PetscTruth     roworiented=a->roworiented;
1685   MatScalar      *ap,value,*aa=a->a,*bap;
1686 
1687   PetscFunctionBegin;
1688   for (k=0; k<m; k++) { /* loop over added rows */
1689     row  = im[k];
1690     brow = row/bs;
1691     if (row < 0) continue;
1692 #if defined(PETSC_USE_DEBUG)
1693     if (row >= A->m) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->m-1);
1694 #endif
1695     rp   = aj + ai[brow];
1696     ap   = aa + bs2*ai[brow];
1697     rmax = imax[brow];
1698     nrow = ailen[brow];
1699     low  = 0;
1700     high = nrow;
1701     for (l=0; l<n; l++) { /* loop over added columns */
1702       if (in[l] < 0) continue;
1703 #if defined(PETSC_USE_DEBUG)
1704       if (in[l] >= A->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->n-1);
1705 #endif
1706       col = in[l]; bcol = col/bs;
1707       ridx = row % bs; cidx = col % bs;
1708       if (roworiented) {
1709         value = v[l + k*n];
1710       } else {
1711         value = v[k + l*m];
1712       }
1713       if (col <= lastcol) low = 0; else high = nrow;
1714       lastcol = col;
1715       while (high-low > 7) {
1716         t = (low+high)/2;
1717         if (rp[t] > bcol) high = t;
1718         else              low  = t;
1719       }
1720       for (i=low; i<high; i++) {
1721         if (rp[i] > bcol) break;
1722         if (rp[i] == bcol) {
1723           bap  = ap +  bs2*i + bs*cidx + ridx;
1724           if (is == ADD_VALUES) *bap += value;
1725           else                  *bap  = value;
1726           goto noinsert1;
1727         }
1728       }
1729       if (nonew == 1) goto noinsert1;
1730       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1731       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,imax,nonew);
1732       N = nrow++ - 1; high++;
1733       /* shift up all the later entries in this row */
1734       for (ii=N; ii>=i; ii--) {
1735         rp[ii+1] = rp[ii];
1736         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1737       }
1738       if (N>=i) {
1739         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1740       }
1741       rp[i]                      = bcol;
1742       ap[bs2*i + bs*cidx + ridx] = value;
1743       a->nz++;
1744       noinsert1:;
1745       low = i;
1746     }
1747     ailen[brow] = nrow;
1748   }
1749   A->same_nonzero = PETSC_FALSE;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1756 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1757 {
1758   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1759   Mat            outA;
1760   PetscErrorCode ierr;
1761   PetscTruth     row_identity,col_identity;
1762 
1763   PetscFunctionBegin;
1764   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1765   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1766   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1767   if (!row_identity || !col_identity) {
1768     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1769   }
1770 
1771   outA          = inA;
1772   inA->factor   = FACTOR_LU;
1773 
1774   if (!a->diag) {
1775     ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1776   }
1777 
1778   a->row        = row;
1779   a->col        = col;
1780   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1781   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1782 
1783   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1784   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1785   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1786 
1787   /*
1788       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1789       for ILU(0) factorization with natural ordering
1790   */
1791   if (inA->bs < 8) {
1792     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1793   } else {
1794     if (!a->solve_work) {
1795       ierr = PetscMalloc((inA->m+inA->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1796       ierr = PetscLogObjectMemory(inA,(inA->m+inA->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1797     }
1798   }
1799 
1800   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1801 
1802   PetscFunctionReturn(0);
1803 }
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatPrintHelp_SeqBAIJ"
1806 PetscErrorCode MatPrintHelp_SeqBAIJ(Mat A)
1807 {
1808   static PetscTruth called = PETSC_FALSE;
1809   MPI_Comm          comm = A->comm;
1810   PetscErrorCode    ierr;
1811 
1812   PetscFunctionBegin;
1813   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1814   ierr = (*PetscHelpPrintf)(comm," Options for MATSEQBAIJ and MATMPIBAIJ matrix formats (the defaults):\n");CHKERRQ(ierr);
1815   ierr = (*PetscHelpPrintf)(comm,"  -mat_block_size <block_size>\n");CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 EXTERN_C_BEGIN
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1822 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1823 {
1824   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1825   PetscInt    i,nz,nbs;
1826 
1827   PetscFunctionBegin;
1828   nz  = baij->maxnz/baij->bs2;
1829   nbs = baij->nbs;
1830   for (i=0; i<nz; i++) {
1831     baij->j[i] = indices[i];
1832   }
1833   baij->nz = nz;
1834   for (i=0; i<nbs; i++) {
1835     baij->ilen[i] = baij->imax[i];
1836   }
1837 
1838   PetscFunctionReturn(0);
1839 }
1840 EXTERN_C_END
1841 
1842 #undef __FUNCT__
1843 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1844 /*@
1845     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1846        in the matrix.
1847 
1848   Input Parameters:
1849 +  mat - the SeqBAIJ matrix
1850 -  indices - the column indices
1851 
1852   Level: advanced
1853 
1854   Notes:
1855     This can be called if you have precomputed the nonzero structure of the
1856   matrix and want to provide it to the matrix object to improve the performance
1857   of the MatSetValues() operation.
1858 
1859     You MUST have set the correct numbers of nonzeros per row in the call to
1860   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1861 
1862     MUST be called before any calls to MatSetValues();
1863 
1864 @*/
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1866 {
1867   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1868 
1869   PetscFunctionBegin;
1870   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1871   PetscValidPointer(indices,2);
1872   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1873   if (f) {
1874     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1875   } else {
1876     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1877   }
1878   PetscFunctionReturn(0);
1879 }
1880 
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1883 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1884 {
1885   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1886   PetscErrorCode ierr;
1887   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1888   PetscReal      atmp;
1889   PetscScalar    *x,zero = 0.0;
1890   MatScalar      *aa;
1891   PetscInt       ncols,brow,krow,kcol;
1892 
1893   PetscFunctionBegin;
1894   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1895   bs   = A->bs;
1896   aa   = a->a;
1897   ai   = a->i;
1898   aj   = a->j;
1899   mbs = a->mbs;
1900 
1901   ierr = VecSet(v,zero);CHKERRQ(ierr);
1902   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1903   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1904   if (n != A->m) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1905   for (i=0; i<mbs; i++) {
1906     ncols = ai[1] - ai[0]; ai++;
1907     brow  = bs*i;
1908     for (j=0; j<ncols; j++){
1909       /* bcol = bs*(*aj); */
1910       for (kcol=0; kcol<bs; kcol++){
1911         for (krow=0; krow<bs; krow++){
1912           atmp = PetscAbsScalar(*aa); aa++;
1913           row = brow + krow;    /* row index */
1914           /* printf("val[%d,%d]: %g\n",row,bcol+kcol,atmp); */
1915           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1916         }
1917       }
1918       aj++;
1919     }
1920   }
1921   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1922   PetscFunctionReturn(0);
1923 }
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatCopy_SeqBAIJ"
1927 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   /* If the two matrices have the same copy implementation, use fast copy. */
1933   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1934     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1935     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1936 
1937     if (a->i[A->m] != b->i[B->m]) {
1938       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1939     }
1940     ierr = PetscMemcpy(b->a,a->a,(a->i[A->m])*sizeof(PetscScalar));CHKERRQ(ierr);
1941   } else {
1942     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 #undef __FUNCT__
1948 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1949 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1950 {
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,1,PETSC_DEFAULT,0);CHKERRQ(ierr);
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 #undef __FUNCT__
1959 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1960 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1961 {
1962   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1963   PetscFunctionBegin;
1964   *array = a->a;
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 #undef __FUNCT__
1969 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1970 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1971 {
1972   PetscFunctionBegin;
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 #include "petscblaslapack.h"
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1979 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1980 {
1981   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1982   PetscErrorCode ierr;
1983   PetscInt       i,bs=Y->bs,j,bs2;
1984   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1985 
1986   PetscFunctionBegin;
1987   if (str == SAME_NONZERO_PATTERN) {
1988     PetscScalar alpha = a;
1989     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1990   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1991     if (y->xtoy && y->XtoY != X) {
1992       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1993       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1994     }
1995     if (!y->xtoy) { /* get xtoy */
1996       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1997       y->XtoY = X;
1998     }
1999     bs2 = bs*bs;
2000     for (i=0; i<x->nz; i++) {
2001       j = 0;
2002       while (j < bs2){
2003         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2004         j++;
2005       }
2006     }
2007     ierr = PetscVerboseInfo((0,"MatAXPY_SeqBAIJ: ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2008   } else {
2009     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2010   }
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2016 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2017 {
2018   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2019   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2020   PetscScalar    *aa = a->a;
2021 
2022   PetscFunctionBegin;
2023   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2029 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2030 {
2031   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2032   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2033   PetscScalar    *aa = a->a;
2034 
2035   PetscFunctionBegin;
2036   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 
2041 /* -------------------------------------------------------------------*/
2042 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2043        MatGetRow_SeqBAIJ,
2044        MatRestoreRow_SeqBAIJ,
2045        MatMult_SeqBAIJ_N,
2046 /* 4*/ MatMultAdd_SeqBAIJ_N,
2047        MatMultTranspose_SeqBAIJ,
2048        MatMultTransposeAdd_SeqBAIJ,
2049        MatSolve_SeqBAIJ_N,
2050        0,
2051        0,
2052 /*10*/ 0,
2053        MatLUFactor_SeqBAIJ,
2054        0,
2055        0,
2056        MatTranspose_SeqBAIJ,
2057 /*15*/ MatGetInfo_SeqBAIJ,
2058        MatEqual_SeqBAIJ,
2059        MatGetDiagonal_SeqBAIJ,
2060        MatDiagonalScale_SeqBAIJ,
2061        MatNorm_SeqBAIJ,
2062 /*20*/ 0,
2063        MatAssemblyEnd_SeqBAIJ,
2064        0,
2065        MatSetOption_SeqBAIJ,
2066        MatZeroEntries_SeqBAIJ,
2067 /*25*/ MatZeroRows_SeqBAIJ,
2068        MatLUFactorSymbolic_SeqBAIJ,
2069        MatLUFactorNumeric_SeqBAIJ_N,
2070        MatCholeskyFactorSymbolic_SeqBAIJ,
2071        MatCholeskyFactorNumeric_SeqBAIJ_N,
2072 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2073        MatILUFactorSymbolic_SeqBAIJ,
2074        MatICCFactorSymbolic_SeqBAIJ,
2075        MatGetArray_SeqBAIJ,
2076        MatRestoreArray_SeqBAIJ,
2077 /*35*/ MatDuplicate_SeqBAIJ,
2078        0,
2079        0,
2080        MatILUFactor_SeqBAIJ,
2081        0,
2082 /*40*/ MatAXPY_SeqBAIJ,
2083        MatGetSubMatrices_SeqBAIJ,
2084        MatIncreaseOverlap_SeqBAIJ,
2085        MatGetValues_SeqBAIJ,
2086        MatCopy_SeqBAIJ,
2087 /*45*/ MatPrintHelp_SeqBAIJ,
2088        MatScale_SeqBAIJ,
2089        0,
2090        0,
2091        0,
2092 /*50*/ 0,
2093        MatGetRowIJ_SeqBAIJ,
2094        MatRestoreRowIJ_SeqBAIJ,
2095        0,
2096        0,
2097 /*55*/ 0,
2098        0,
2099        0,
2100        0,
2101        MatSetValuesBlocked_SeqBAIJ,
2102 /*60*/ MatGetSubMatrix_SeqBAIJ,
2103        MatDestroy_SeqBAIJ,
2104        MatView_SeqBAIJ,
2105        MatGetPetscMaps_Petsc,
2106        0,
2107 /*65*/ 0,
2108        0,
2109        0,
2110        0,
2111        0,
2112 /*70*/ MatGetRowMax_SeqBAIJ,
2113        MatConvert_Basic,
2114        0,
2115        0,
2116        0,
2117 /*75*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*80*/ 0,
2123        0,
2124        0,
2125        0,
2126        MatLoad_SeqBAIJ,
2127 /*85*/ 0,
2128        0,
2129        0,
2130        0,
2131        0,
2132 /*90*/ 0,
2133        0,
2134        0,
2135        0,
2136        0,
2137 /*95*/ 0,
2138        0,
2139        0,
2140        0,
2141        0,
2142 /*100*/0,
2143        0,
2144        0,
2145        0,
2146        0,
2147 /*105*/0,
2148        MatRealPart_SeqBAIJ,
2149        MatImaginaryPart_SeqBAIJ
2150 };
2151 
2152 EXTERN_C_BEGIN
2153 #undef __FUNCT__
2154 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2155 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2156 {
2157   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2158   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2159   PetscErrorCode ierr;
2160 
2161   PetscFunctionBegin;
2162   if (aij->nonew != 1) {
2163     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2164   }
2165 
2166   /* allocate space for values if not already there */
2167   if (!aij->saved_values) {
2168     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2169   }
2170 
2171   /* copy values over */
2172   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2173   PetscFunctionReturn(0);
2174 }
2175 EXTERN_C_END
2176 
2177 EXTERN_C_BEGIN
2178 #undef __FUNCT__
2179 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2180 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2181 {
2182   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2183   PetscErrorCode ierr;
2184   PetscInt       nz = aij->i[mat->m]*mat->bs*aij->bs2;
2185 
2186   PetscFunctionBegin;
2187   if (aij->nonew != 1) {
2188     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2189   }
2190   if (!aij->saved_values) {
2191     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2192   }
2193 
2194   /* copy values over */
2195   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2196   PetscFunctionReturn(0);
2197 }
2198 EXTERN_C_END
2199 
2200 EXTERN_C_BEGIN
2201 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2202 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2203 EXTERN_C_END
2204 
2205 EXTERN_C_BEGIN
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2208 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2209 {
2210   Mat_SeqBAIJ    *b;
2211   PetscErrorCode ierr;
2212   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2213   PetscTruth     flg,skipallocation = PETSC_FALSE;
2214 
2215   PetscFunctionBegin;
2216 
2217   if (nz == MAT_SKIP_ALLOCATION) {
2218     skipallocation = PETSC_TRUE;
2219     nz             = 0;
2220   }
2221 
2222   B->preallocated = PETSC_TRUE;
2223   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&newbs,PETSC_NULL);CHKERRQ(ierr);
2224   if (nnz && newbs != bs) {
2225     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2226   }
2227   bs   = newbs;
2228 
2229   mbs  = B->m/bs;
2230   nbs  = B->n/bs;
2231   bs2  = bs*bs;
2232 
2233   if (mbs*bs!=B->m || nbs*bs!=B->n) {
2234     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->m,B->n,bs);
2235   }
2236 
2237   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2238   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2239   if (nnz) {
2240     for (i=0; i<mbs; i++) {
2241       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2242       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2243     }
2244   }
2245 
2246   b       = (Mat_SeqBAIJ*)B->data;
2247   ierr    = PetscOptionsHasName(PETSC_NULL,"-mat_no_unroll",&flg);CHKERRQ(ierr);
2248   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2249   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2250   if (!flg) {
2251     switch (bs) {
2252     case 1:
2253       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2254       B->ops->mult            = MatMult_SeqBAIJ_1;
2255       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2256       break;
2257     case 2:
2258       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2259       B->ops->mult            = MatMult_SeqBAIJ_2;
2260       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2261       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2262       break;
2263     case 3:
2264       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2265       B->ops->mult            = MatMult_SeqBAIJ_3;
2266       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2267       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2268       break;
2269     case 4:
2270       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2271       B->ops->mult            = MatMult_SeqBAIJ_4;
2272       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2273       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2274       break;
2275     case 5:
2276       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2277       B->ops->mult            = MatMult_SeqBAIJ_5;
2278       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2279       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2280       break;
2281     case 6:
2282       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2283       B->ops->mult            = MatMult_SeqBAIJ_6;
2284       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2285       break;
2286     case 7:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2288       B->ops->mult            = MatMult_SeqBAIJ_7;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2290       break;
2291     default:
2292       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2293       B->ops->mult            = MatMult_SeqBAIJ_N;
2294       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2295       break;
2296     }
2297   }
2298   B->bs      = bs;
2299   b->mbs     = mbs;
2300   b->nbs     = nbs;
2301   if (!skipallocation) {
2302     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2303     /* b->ilen will count nonzeros in each block row so far. */
2304     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2305     if (!nnz) {
2306       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2307       else if (nz <= 0)        nz = 1;
2308       for (i=0; i<mbs; i++) b->imax[i] = nz;
2309       nz = nz*mbs;
2310     } else {
2311       nz = 0;
2312       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2313     }
2314 
2315     /* allocate the matrix space */
2316     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->m+1,PetscInt,&b->i);CHKERRQ(ierr);
2317     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2318     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2319     b->singlemalloc = PETSC_TRUE;
2320 
2321     b->i[0] = 0;
2322     for (i=1; i<mbs+1; i++) {
2323       b->i[i] = b->i[i-1] + b->imax[i-1];
2324     }
2325   }
2326 
2327   B->bs               = bs;
2328   b->bs2              = bs2;
2329   b->mbs              = mbs;
2330   b->nz               = 0;
2331   b->maxnz            = nz*bs2;
2332   B->info.nz_unneeded = (PetscReal)b->maxnz;
2333   PetscFunctionReturn(0);
2334 }
2335 EXTERN_C_END
2336 
2337 /*MC
2338    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2339    block sparse compressed row format.
2340 
2341    Options Database Keys:
2342 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2343 
2344   Level: beginner
2345 
2346 .seealso: MatCreateSeqBAIJ()
2347 M*/
2348 
2349 EXTERN_C_BEGIN
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatCreate_SeqBAIJ"
2352 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2353 {
2354   PetscErrorCode ierr;
2355   PetscMPIInt    size;
2356   Mat_SeqBAIJ    *b;
2357 
2358   PetscFunctionBegin;
2359   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2360   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2361 
2362   B->m = B->M = PetscMax(B->m,B->M);
2363   B->n = B->N = PetscMax(B->n,B->N);
2364   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2365   B->data = (void*)b;
2366   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2367   B->factor           = 0;
2368   B->mapping          = 0;
2369   b->row              = 0;
2370   b->col              = 0;
2371   b->icol             = 0;
2372   b->reallocs         = 0;
2373   b->saved_values     = 0;
2374 #if defined(PETSC_USE_MAT_SINGLE)
2375   b->setvalueslen     = 0;
2376   b->setvaluescopy    = PETSC_NULL;
2377 #endif
2378 
2379   ierr = PetscMapCreateMPI(B->comm,B->m,B->m,&B->rmap);CHKERRQ(ierr);
2380   ierr = PetscMapCreateMPI(B->comm,B->n,B->n,&B->cmap);CHKERRQ(ierr);
2381 
2382   b->sorted           = PETSC_FALSE;
2383   b->roworiented      = PETSC_TRUE;
2384   b->nonew            = 0;
2385   b->diag             = 0;
2386   b->solve_work       = 0;
2387   b->mult_work        = 0;
2388   B->spptr            = 0;
2389   B->info.nz_unneeded = (PetscReal)b->maxnz;
2390   b->keepzeroedrows   = PETSC_FALSE;
2391   b->xtoy              = 0;
2392   b->XtoY              = 0;
2393   b->compressedrow.use     = PETSC_FALSE;
2394   b->compressedrow.nrows   = 0;
2395   b->compressedrow.i       = PETSC_NULL;
2396   b->compressedrow.rindex  = PETSC_NULL;
2397   b->compressedrow.checked = PETSC_FALSE;
2398   B->same_nonzero          = PETSC_FALSE;
2399 
2400   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2401                                      "MatInvertBlockDiagonal_SeqBAIJ",
2402                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2403   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2404                                      "MatStoreValues_SeqBAIJ",
2405                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2406   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2407                                      "MatRetrieveValues_SeqBAIJ",
2408                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2409   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2410                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2411                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2412   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2413                                      "MatConvert_SeqBAIJ_SeqAIJ",
2414                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2415   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2416                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2417                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2418   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2419                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2420                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2421   PetscFunctionReturn(0);
2422 }
2423 EXTERN_C_END
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2427 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2428 {
2429   Mat            C;
2430   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2431   PetscErrorCode ierr;
2432   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2433 
2434   PetscFunctionBegin;
2435   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2436 
2437   *B = 0;
2438   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2439   ierr = MatSetSizes(C,A->m,A->n,A->m,A->n);CHKERRQ(ierr);
2440   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2441   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2442   c    = (Mat_SeqBAIJ*)C->data;
2443 
2444   C->M   = A->M;
2445   C->N   = A->N;
2446   C->bs  = A->bs;
2447   c->bs2 = a->bs2;
2448   c->mbs = a->mbs;
2449   c->nbs = a->nbs;
2450 
2451   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2452   for (i=0; i<mbs; i++) {
2453     c->imax[i] = a->imax[i];
2454     c->ilen[i] = a->ilen[i];
2455   }
2456 
2457   /* allocate the matrix space */
2458   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2459   c->singlemalloc = PETSC_TRUE;
2460   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2461   if (mbs > 0) {
2462     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2463     if (cpvalues == MAT_COPY_VALUES) {
2464       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2465     } else {
2466       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2467     }
2468   }
2469   c->sorted      = a->sorted;
2470   c->roworiented = a->roworiented;
2471   c->nonew       = a->nonew;
2472 
2473   if (a->diag) {
2474     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2475     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2476     for (i=0; i<mbs; i++) {
2477       c->diag[i] = a->diag[i];
2478     }
2479   } else c->diag        = 0;
2480   c->nz                 = a->nz;
2481   c->maxnz              = a->maxnz;
2482   c->solve_work         = 0;
2483   c->mult_work          = 0;
2484   C->preallocated       = PETSC_TRUE;
2485   C->assembled          = PETSC_TRUE;
2486 
2487   c->compressedrow.use     = a->compressedrow.use;
2488   c->compressedrow.nrows   = a->compressedrow.nrows;
2489   c->compressedrow.checked = a->compressedrow.checked;
2490   if ( a->compressedrow.checked && a->compressedrow.use){
2491     i = a->compressedrow.nrows;
2492     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2493     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2494     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2495     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2496   } else {
2497     c->compressedrow.use    = PETSC_FALSE;
2498     c->compressedrow.i      = PETSC_NULL;
2499     c->compressedrow.rindex = PETSC_NULL;
2500   }
2501   C->same_nonzero = A->same_nonzero;
2502   *B = C;
2503   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2504   PetscFunctionReturn(0);
2505 }
2506 
2507 #undef __FUNCT__
2508 #define __FUNCT__ "MatLoad_SeqBAIJ"
2509 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2510 {
2511   Mat_SeqBAIJ    *a;
2512   Mat            B;
2513   PetscErrorCode ierr;
2514   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2515   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2516   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2517   PetscInt       *masked,nmask,tmp,bs2,ishift;
2518   PetscMPIInt    size;
2519   int            fd;
2520   PetscScalar    *aa;
2521   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2522 
2523   PetscFunctionBegin;
2524   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2525   bs2  = bs*bs;
2526 
2527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2528   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2529   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2530   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2531   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2532   M = header[1]; N = header[2]; nz = header[3];
2533 
2534   if (header[3] < 0) {
2535     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2536   }
2537 
2538   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2539 
2540   /*
2541      This code adds extra rows to make sure the number of rows is
2542     divisible by the blocksize
2543   */
2544   mbs        = M/bs;
2545   extra_rows = bs - M + bs*(mbs);
2546   if (extra_rows == bs) extra_rows = 0;
2547   else                  mbs++;
2548   if (extra_rows) {
2549     ierr = PetscVerboseInfo((0,"MatLoad_SeqBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2550   }
2551 
2552   /* read in row lengths */
2553   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2554   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2555   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2556 
2557   /* read in column indices */
2558   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2559   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2560   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2561 
2562   /* loop over row lengths determining block row lengths */
2563   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2564   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2565   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2566   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2567   masked   = mask + mbs;
2568   rowcount = 0; nzcount = 0;
2569   for (i=0; i<mbs; i++) {
2570     nmask = 0;
2571     for (j=0; j<bs; j++) {
2572       kmax = rowlengths[rowcount];
2573       for (k=0; k<kmax; k++) {
2574         tmp = jj[nzcount++]/bs;
2575         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2576       }
2577       rowcount++;
2578     }
2579     browlengths[i] += nmask;
2580     /* zero out the mask elements we set */
2581     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2582   }
2583 
2584   /* create our matrix */
2585   ierr = MatCreate(comm,&B);
2586   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2587   ierr = MatSetType(B,type);CHKERRQ(ierr);
2588   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2589   a = (Mat_SeqBAIJ*)B->data;
2590 
2591   /* set matrix "i" values */
2592   a->i[0] = 0;
2593   for (i=1; i<= mbs; i++) {
2594     a->i[i]      = a->i[i-1] + browlengths[i-1];
2595     a->ilen[i-1] = browlengths[i-1];
2596   }
2597   a->nz         = 0;
2598   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2599 
2600   /* read in nonzero values */
2601   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2602   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2603   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2604 
2605   /* set "a" and "j" values into matrix */
2606   nzcount = 0; jcount = 0;
2607   for (i=0; i<mbs; i++) {
2608     nzcountb = nzcount;
2609     nmask    = 0;
2610     for (j=0; j<bs; j++) {
2611       kmax = rowlengths[i*bs+j];
2612       for (k=0; k<kmax; k++) {
2613         tmp = jj[nzcount++]/bs;
2614 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2615       }
2616     }
2617     /* sort the masked values */
2618     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2619 
2620     /* set "j" values into matrix */
2621     maskcount = 1;
2622     for (j=0; j<nmask; j++) {
2623       a->j[jcount++]  = masked[j];
2624       mask[masked[j]] = maskcount++;
2625     }
2626     /* set "a" values into matrix */
2627     ishift = bs2*a->i[i];
2628     for (j=0; j<bs; j++) {
2629       kmax = rowlengths[i*bs+j];
2630       for (k=0; k<kmax; k++) {
2631         tmp       = jj[nzcountb]/bs ;
2632         block     = mask[tmp] - 1;
2633         point     = jj[nzcountb] - bs*tmp;
2634         idx       = ishift + bs2*block + j + bs*point;
2635         a->a[idx] = (MatScalar)aa[nzcountb++];
2636       }
2637     }
2638     /* zero out the mask elements we set */
2639     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2640   }
2641   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2642 
2643   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2644   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2645   ierr = PetscFree(aa);CHKERRQ(ierr);
2646   ierr = PetscFree(jj);CHKERRQ(ierr);
2647   ierr = PetscFree(mask);CHKERRQ(ierr);
2648 
2649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatView_Private(B);CHKERRQ(ierr);
2652 
2653   *A = B;
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatCreateSeqBAIJ"
2659 /*@C
2660    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2661    compressed row) format.  For good matrix assembly performance the
2662    user should preallocate the matrix storage by setting the parameter nz
2663    (or the array nnz).  By setting these parameters accurately, performance
2664    during matrix assembly can be increased by more than a factor of 50.
2665 
2666    Collective on MPI_Comm
2667 
2668    Input Parameters:
2669 +  comm - MPI communicator, set to PETSC_COMM_SELF
2670 .  bs - size of block
2671 .  m - number of rows
2672 .  n - number of columns
2673 .  nz - number of nonzero blocks  per block row (same for all rows)
2674 -  nnz - array containing the number of nonzero blocks in the various block rows
2675          (possibly different for each block row) or PETSC_NULL
2676 
2677    Output Parameter:
2678 .  A - the matrix
2679 
2680    Options Database Keys:
2681 .   -mat_no_unroll - uses code that does not unroll the loops in the
2682                      block calculations (much slower)
2683 .    -mat_block_size - size of the blocks to use
2684 
2685    Level: intermediate
2686 
2687    Notes:
2688    The number of rows and columns must be divisible by blocksize.
2689 
2690    If the nnz parameter is given then the nz parameter is ignored
2691 
2692    A nonzero block is any block that as 1 or more nonzeros in it
2693 
2694    The block AIJ format is fully compatible with standard Fortran 77
2695    storage.  That is, the stored row and column indices can begin at
2696    either one (as in Fortran) or zero.  See the users' manual for details.
2697 
2698    Specify the preallocated storage with either nz or nnz (not both).
2699    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2700    allocation.  For additional details, see the users manual chapter on
2701    matrices.
2702 
2703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2704 @*/
2705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2706 {
2707   PetscErrorCode ierr;
2708 
2709   PetscFunctionBegin;
2710   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2711   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2712   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2713   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2719 /*@C
2720    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2721    per row in the matrix. For good matrix assembly performance the
2722    user should preallocate the matrix storage by setting the parameter nz
2723    (or the array nnz).  By setting these parameters accurately, performance
2724    during matrix assembly can be increased by more than a factor of 50.
2725 
2726    Collective on MPI_Comm
2727 
2728    Input Parameters:
2729 +  A - the matrix
2730 .  bs - size of block
2731 .  nz - number of block nonzeros per block row (same for all rows)
2732 -  nnz - array containing the number of block nonzeros in the various block rows
2733          (possibly different for each block row) or PETSC_NULL
2734 
2735    Options Database Keys:
2736 .   -mat_no_unroll - uses code that does not unroll the loops in the
2737                      block calculations (much slower)
2738 .    -mat_block_size - size of the blocks to use
2739 
2740    Level: intermediate
2741 
2742    Notes:
2743    If the nnz parameter is given then the nz parameter is ignored
2744 
2745    The block AIJ format is fully compatible with standard Fortran 77
2746    storage.  That is, the stored row and column indices can begin at
2747    either one (as in Fortran) or zero.  See the users' manual for details.
2748 
2749    Specify the preallocated storage with either nz or nnz (not both).
2750    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2751    allocation.  For additional details, see the users manual chapter on
2752    matrices.
2753 
2754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2755 @*/
2756 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2757 {
2758   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2759 
2760   PetscFunctionBegin;
2761   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2762   if (f) {
2763     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2764   }
2765   PetscFunctionReturn(0);
2766 }
2767 
2768