xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 58cd72c3d0f0c5a63e84afc03ac54bb1bb84cd8d)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt       n = a->mbs,i;
785 
786   PetscFunctionBegin;
787   *nn = n;
788   if (!ia) PetscFunctionReturn(0);
789   if (symmetric) {
790     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
791   } else if (oshift == 1) {
792     /* temporarily add 1 to i and j indices */
793     PetscInt nz = a->i[n];
794     for (i=0; i<nz; i++) a->j[i]++;
795     for (i=0; i<n+1; i++) a->i[i]++;
796     *ia = a->i; *ja = a->j;
797   } else {
798     *ia = a->i; *ja = a->j;
799   }
800 
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807 {
808   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
809   PetscErrorCode ierr;
810   PetscInt       i,n = a->mbs;
811 
812   PetscFunctionBegin;
813   if (!ia) PetscFunctionReturn(0);
814   if (symmetric) {
815     ierr = PetscFree(*ia);CHKERRQ(ierr);
816     ierr = PetscFree(*ja);CHKERRQ(ierr);
817   } else if (oshift == 1) {
818     PetscInt nz = a->i[n]-1;
819     for (i=0; i<nz; i++) a->j[i]--;
820     for (i=0; i<n+1; i++) a->i[i]--;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_SeqBAIJ"
827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835 #endif
836   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(a->diag);CHKERRQ(ierr);
844   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
845   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
846   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
847   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
850 #if defined(PETSC_USE_MAT_SINGLE)
851   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
852 #endif
853   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetOption_SeqBAIJ"
871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872 {
873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   switch (op) {
878   case MAT_ROW_ORIENTED:
879     a->roworiented    = PETSC_TRUE;
880     break;
881   case MAT_COLUMN_ORIENTED:
882     a->roworiented    = PETSC_FALSE;
883     break;
884   case MAT_COLUMNS_SORTED:
885     a->sorted         = PETSC_TRUE;
886     break;
887   case MAT_COLUMNS_UNSORTED:
888     a->sorted         = PETSC_FALSE;
889     break;
890   case MAT_KEEP_ZEROED_ROWS:
891     a->keepzeroedrows = PETSC_TRUE;
892     break;
893   case MAT_NO_NEW_NONZERO_LOCATIONS:
894     a->nonew          = 1;
895     break;
896   case MAT_NEW_NONZERO_LOCATION_ERR:
897     a->nonew          = -1;
898     break;
899   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900     a->nonew          = -2;
901     break;
902   case MAT_YES_NEW_NONZERO_LOCATIONS:
903     a->nonew          = 0;
904     break;
905   case MAT_ROWS_SORTED:
906   case MAT_ROWS_UNSORTED:
907   case MAT_YES_NEW_DIAGONALS:
908   case MAT_IGNORE_OFF_PROC_ENTRIES:
909   case MAT_USE_HASH_TABLE:
910     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
911     break;
912   case MAT_NO_NEW_DIAGONALS:
913     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914   case MAT_SYMMETRIC:
915   case MAT_STRUCTURALLY_SYMMETRIC:
916   case MAT_NOT_SYMMETRIC:
917   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918   case MAT_HERMITIAN:
919   case MAT_NOT_HERMITIAN:
920   case MAT_SYMMETRY_ETERNAL:
921   case MAT_NOT_SYMMETRY_ETERNAL:
922     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
923     break;
924   default:
925     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
926   }
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatGetRow_SeqBAIJ"
932 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
933 {
934   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
935   PetscErrorCode ierr;
936   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
937   MatScalar      *aa,*aa_i;
938   PetscScalar    *v_i;
939 
940   PetscFunctionBegin;
941   bs  = A->rmap.bs;
942   ai  = a->i;
943   aj  = a->j;
944   aa  = a->a;
945   bs2 = a->bs2;
946 
947   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
948 
949   bn  = row/bs;   /* Block number */
950   bp  = row % bs; /* Block Position */
951   M   = ai[bn+1] - ai[bn];
952   *nz = bs*M;
953 
954   if (v) {
955     *v = 0;
956     if (*nz) {
957       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
958       for (i=0; i<M; i++) { /* for each block in the block row */
959         v_i  = *v + i*bs;
960         aa_i = aa + bs2*(ai[bn] + i);
961         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
962       }
963     }
964   }
965 
966   if (idx) {
967     *idx = 0;
968     if (*nz) {
969       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
970       for (i=0; i<M; i++) { /* for each block in the block row */
971         idx_i = *idx + i*bs;
972         itmp  = bs*aj[ai[bn] + i];
973         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
974       }
975     }
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
982 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
988   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatTranspose_SeqBAIJ"
994 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
995 {
996   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
997   Mat            C;
998   PetscErrorCode ierr;
999   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1000   PetscInt       *rows,*cols,bs2=a->bs2;
1001   PetscScalar    *array;
1002 
1003   PetscFunctionBegin;
1004   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1005   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1006   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1007 
1008 #if defined(PETSC_USE_MAT_SINGLE)
1009   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1010   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1011 #else
1012   array = a->a;
1013 #endif
1014 
1015   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1016   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1017   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1018   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1019   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1020   ierr = PetscFree(col);CHKERRQ(ierr);
1021   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1022   cols = rows + bs;
1023   for (i=0; i<mbs; i++) {
1024     cols[0] = i*bs;
1025     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1026     len = ai[i+1] - ai[i];
1027     for (j=0; j<len; j++) {
1028       rows[0] = (*aj++)*bs;
1029       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1030       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1031       array += bs2;
1032     }
1033   }
1034   ierr = PetscFree(rows);CHKERRQ(ierr);
1035 #if defined(PETSC_USE_MAT_SINGLE)
1036   ierr = PetscFree(array);
1037 #endif
1038 
1039   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041 
1042   if (B) {
1043     *B = C;
1044   } else {
1045     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1046   }
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1052 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1057   int            fd;
1058   PetscScalar    *aa;
1059   FILE           *file;
1060 
1061   PetscFunctionBegin;
1062   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1063   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1064   col_lens[0] = MAT_FILE_COOKIE;
1065 
1066   col_lens[1] = A->rmap.N;
1067   col_lens[2] = A->cmap.n;
1068   col_lens[3] = a->nz*bs2;
1069 
1070   /* store lengths of each row and write (including header) to file */
1071   count = 0;
1072   for (i=0; i<a->mbs; i++) {
1073     for (j=0; j<bs; j++) {
1074       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1075     }
1076   }
1077   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1078   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1079 
1080   /* store column indices (zero start index) */
1081   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1082   count = 0;
1083   for (i=0; i<a->mbs; i++) {
1084     for (j=0; j<bs; j++) {
1085       for (k=a->i[i]; k<a->i[i+1]; k++) {
1086         for (l=0; l<bs; l++) {
1087           jj[count++] = bs*a->j[k] + l;
1088         }
1089       }
1090     }
1091   }
1092   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1093   ierr = PetscFree(jj);CHKERRQ(ierr);
1094 
1095   /* store nonzero values */
1096   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1097   count = 0;
1098   for (i=0; i<a->mbs; i++) {
1099     for (j=0; j<bs; j++) {
1100       for (k=a->i[i]; k<a->i[i+1]; k++) {
1101         for (l=0; l<bs; l++) {
1102           aa[count++] = a->a[bs2*k + l*bs + j];
1103         }
1104       }
1105     }
1106   }
1107   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1108   ierr = PetscFree(aa);CHKERRQ(ierr);
1109 
1110   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1111   if (file) {
1112     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1119 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1120 {
1121   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1122   PetscErrorCode    ierr;
1123   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1124   PetscViewerFormat format;
1125 
1126   PetscFunctionBegin;
1127   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1128   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1129     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1130   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1131     Mat aij;
1132     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1133     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1134     ierr = MatDestroy(aij);CHKERRQ(ierr);
1135   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1136      PetscFunctionReturn(0);
1137   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1138     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1139     for (i=0; i<a->mbs; i++) {
1140       for (j=0; j<bs; j++) {
1141         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1142         for (k=a->i[i]; k<a->i[i+1]; k++) {
1143           for (l=0; l<bs; l++) {
1144 #if defined(PETSC_USE_COMPLEX)
1145             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1147                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1150                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1153             }
1154 #else
1155             if (a->a[bs2*k + l*bs + j] != 0.0) {
1156               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1157             }
1158 #endif
1159           }
1160         }
1161         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1162       }
1163     }
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1165   } else {
1166     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1167     for (i=0; i<a->mbs; i++) {
1168       for (j=0; j<bs; j++) {
1169         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1170         for (k=a->i[i]; k<a->i[i+1]; k++) {
1171           for (l=0; l<bs; l++) {
1172 #if defined(PETSC_USE_COMPLEX)
1173             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1174               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1175                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1176             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1178                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             } else {
1180               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1181             }
1182 #else
1183             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1184 #endif
1185           }
1186         }
1187         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1188       }
1189     }
1190     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1191   }
1192   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1198 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1199 {
1200   Mat            A = (Mat) Aa;
1201   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1202   PetscErrorCode ierr;
1203   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1204   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1205   MatScalar      *aa;
1206   PetscViewer    viewer;
1207 
1208   PetscFunctionBegin;
1209 
1210   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1211   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1212 
1213   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1214 
1215   /* loop over matrix elements drawing boxes */
1216   color = PETSC_DRAW_BLUE;
1217   for (i=0,row=0; i<mbs; i++,row+=bs) {
1218     for (j=a->i[i]; j<a->i[i+1]; j++) {
1219       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1220       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1221       aa = a->a + j*bs2;
1222       for (k=0; k<bs; k++) {
1223         for (l=0; l<bs; l++) {
1224           if (PetscRealPart(*aa++) >=  0.) continue;
1225           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1226         }
1227       }
1228     }
1229   }
1230   color = PETSC_DRAW_CYAN;
1231   for (i=0,row=0; i<mbs; i++,row+=bs) {
1232     for (j=a->i[i]; j<a->i[i+1]; j++) {
1233       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1234       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1235       aa = a->a + j*bs2;
1236       for (k=0; k<bs; k++) {
1237         for (l=0; l<bs; l++) {
1238           if (PetscRealPart(*aa++) != 0.) continue;
1239           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1240         }
1241       }
1242     }
1243   }
1244 
1245   color = PETSC_DRAW_RED;
1246   for (i=0,row=0; i<mbs; i++,row+=bs) {
1247     for (j=a->i[i]; j<a->i[i+1]; j++) {
1248       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1249       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1250       aa = a->a + j*bs2;
1251       for (k=0; k<bs; k++) {
1252         for (l=0; l<bs; l++) {
1253           if (PetscRealPart(*aa++) <= 0.) continue;
1254           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1255         }
1256       }
1257     }
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1264 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1265 {
1266   PetscErrorCode ierr;
1267   PetscReal      xl,yl,xr,yr,w,h;
1268   PetscDraw      draw;
1269   PetscTruth     isnull;
1270 
1271   PetscFunctionBegin;
1272 
1273   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1274   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1275 
1276   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1277   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1278   xr += w;    yr += h;  xl = -w;     yl = -h;
1279   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1280   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1281   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatView_SeqBAIJ"
1287 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1288 {
1289   PetscErrorCode ierr;
1290   PetscTruth     iascii,isbinary,isdraw;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1295   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1296   if (iascii){
1297     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1298   } else if (isbinary) {
1299     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1300   } else if (isdraw) {
1301     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1302   } else {
1303     Mat B;
1304     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1305     ierr = MatView(B,viewer);CHKERRQ(ierr);
1306     ierr = MatDestroy(B);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1314 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1315 {
1316   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1317   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1318   PetscInt    *ai = a->i,*ailen = a->ilen;
1319   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1320   MatScalar   *ap,*aa = a->a,zero = 0.0;
1321 
1322   PetscFunctionBegin;
1323   for (k=0; k<m; k++) { /* loop over rows */
1324     row  = im[k]; brow = row/bs;
1325     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1326     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1327     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1328     nrow = ailen[brow];
1329     for (l=0; l<n; l++) { /* loop over columns */
1330       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1331       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1332       col  = in[l] ;
1333       bcol = col/bs;
1334       cidx = col%bs;
1335       ridx = row%bs;
1336       high = nrow;
1337       low  = 0; /* assume unsorted */
1338       while (high-low > 5) {
1339         t = (low+high)/2;
1340         if (rp[t] > bcol) high = t;
1341         else             low  = t;
1342       }
1343       for (i=low; i<high; i++) {
1344         if (rp[i] > bcol) break;
1345         if (rp[i] == bcol) {
1346           *v++ = ap[bs2*i+bs*cidx+ridx];
1347           goto finished;
1348         }
1349       }
1350       *v++ = zero;
1351       finished:;
1352     }
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #if defined(PETSC_USE_MAT_SINGLE)
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1360 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1361 {
1362   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1363   PetscErrorCode ierr;
1364   PetscInt       i,N = m*n*b->bs2;
1365   MatScalar      *vsingle;
1366 
1367   PetscFunctionBegin;
1368   if (N > b->setvalueslen) {
1369     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1370     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1371     b->setvalueslen  = N;
1372   }
1373   vsingle = b->setvaluescopy;
1374   for (i=0; i<N; i++) {
1375     vsingle[i] = v[i];
1376   }
1377   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 #endif
1381 
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1385 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1386 {
1387   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1388   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1389   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1390   PetscErrorCode  ierr;
1391   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1392   PetscTruth      roworiented=a->roworiented;
1393   const MatScalar *value = v;
1394   MatScalar       *ap,*aa = a->a,*bap;
1395 
1396   PetscFunctionBegin;
1397   if (roworiented) {
1398     stepval = (n-1)*bs;
1399   } else {
1400     stepval = (m-1)*bs;
1401   }
1402   for (k=0; k<m; k++) { /* loop over added rows */
1403     row  = im[k];
1404     if (row < 0) continue;
1405 #if defined(PETSC_USE_DEBUG)
1406     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1407 #endif
1408     rp   = aj + ai[row];
1409     ap   = aa + bs2*ai[row];
1410     rmax = imax[row];
1411     nrow = ailen[row];
1412     low  = 0;
1413     high = nrow;
1414     for (l=0; l<n; l++) { /* loop over added columns */
1415       if (in[l] < 0) continue;
1416 #if defined(PETSC_USE_DEBUG)
1417       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1418 #endif
1419       col = in[l];
1420       if (roworiented) {
1421         value = v + k*(stepval+bs)*bs + l*bs;
1422       } else {
1423         value = v + l*(stepval+bs)*bs + k*bs;
1424       }
1425       if (col <= lastcol) low = 0; else high = nrow;
1426       lastcol = col;
1427       while (high-low > 7) {
1428         t = (low+high)/2;
1429         if (rp[t] > col) high = t;
1430         else             low  = t;
1431       }
1432       for (i=low; i<high; i++) {
1433         if (rp[i] > col) break;
1434         if (rp[i] == col) {
1435           bap  = ap +  bs2*i;
1436           if (roworiented) {
1437             if (is == ADD_VALUES) {
1438               for (ii=0; ii<bs; ii++,value+=stepval) {
1439                 for (jj=ii; jj<bs2; jj+=bs) {
1440                   bap[jj] += *value++;
1441                 }
1442               }
1443             } else {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=ii; jj<bs2; jj+=bs) {
1446                   bap[jj] = *value++;
1447                 }
1448               }
1449             }
1450           } else {
1451             if (is == ADD_VALUES) {
1452               for (ii=0; ii<bs; ii++,value+=stepval) {
1453                 for (jj=0; jj<bs; jj++) {
1454                   *bap++ += *value++;
1455                 }
1456               }
1457             } else {
1458               for (ii=0; ii<bs; ii++,value+=stepval) {
1459                 for (jj=0; jj<bs; jj++) {
1460                   *bap++  = *value++;
1461                 }
1462               }
1463             }
1464           }
1465           goto noinsert2;
1466         }
1467       }
1468       if (nonew == 1) goto noinsert2;
1469       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1470       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1471       N = nrow++ - 1; high++;
1472       /* shift up all the later entries in this row */
1473       for (ii=N; ii>=i; ii--) {
1474         rp[ii+1] = rp[ii];
1475         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1476       }
1477       if (N >= i) {
1478         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479       }
1480       rp[i] = col;
1481       bap   = ap +  bs2*i;
1482       if (roworiented) {
1483         for (ii=0; ii<bs; ii++,value+=stepval) {
1484           for (jj=ii; jj<bs2; jj+=bs) {
1485             bap[jj] = *value++;
1486           }
1487         }
1488       } else {
1489         for (ii=0; ii<bs; ii++,value+=stepval) {
1490           for (jj=0; jj<bs; jj++) {
1491             *bap++  = *value++;
1492           }
1493         }
1494       }
1495       noinsert2:;
1496       low = i;
1497     }
1498     ailen[row] = nrow;
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506 {
1507   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1508   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1510   PetscErrorCode ierr;
1511   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1512   MatScalar      *aa = a->a,*ap;
1513   PetscReal      ratio=0.6;
1514 
1515   PetscFunctionBegin;
1516   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1517 
1518   if (m) rmax = ailen[0];
1519   for (i=1; i<mbs; i++) {
1520     /* move each row back by the amount of empty slots (fshift) before it*/
1521     fshift += imax[i-1] - ailen[i-1];
1522     rmax   = PetscMax(rmax,ailen[i]);
1523     if (fshift) {
1524       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525       N = ailen[i];
1526       for (j=0; j<N; j++) {
1527         ip[j-fshift] = ip[j];
1528         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1529       }
1530     }
1531     ai[i] = ai[i-1] + ailen[i-1];
1532   }
1533   if (mbs) {
1534     fshift += imax[mbs-1] - ailen[mbs-1];
1535     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536   }
1537   /* reset ilen and imax for each row */
1538   for (i=0; i<mbs; i++) {
1539     ailen[i] = imax[i] = ai[i+1] - ai[i];
1540   }
1541   a->nz = ai[mbs];
1542 
1543   /* diagonals may have moved, so kill the diagonal pointers */
1544   a->idiagvalid = PETSC_FALSE;
1545   if (fshift && a->diag) {
1546     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1547     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1548     a->diag = 0;
1549   }
1550   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1551   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1552   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1553   a->reallocs          = 0;
1554   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1555 
1556   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1557   if (a->compressedrow.use){
1558     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1559   }
1560 
1561   A->same_nonzero = PETSC_TRUE;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 /*
1566    This function returns an array of flags which indicate the locations of contiguous
1567    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1568    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569    Assume: sizes should be long enough to hold all the values.
1570 */
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574 {
1575   PetscInt   i,j,k,row;
1576   PetscTruth flg;
1577 
1578   PetscFunctionBegin;
1579   for (i=0,j=0; i<n; j++) {
1580     row = idx[i];
1581     if (row%bs!=0) { /* Not the begining of a block */
1582       sizes[j] = 1;
1583       i++;
1584     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1586       i++;
1587     } else { /* Begining of the block, so check if the complete block exists */
1588       flg = PETSC_TRUE;
1589       for (k=1; k<bs; k++) {
1590         if (row+k != idx[i+k]) { /* break in the block */
1591           flg = PETSC_FALSE;
1592           break;
1593         }
1594       }
1595       if (flg) { /* No break in the bs */
1596         sizes[j] = bs;
1597         i+= bs;
1598       } else {
1599         sizes[j] = 1;
1600         i++;
1601       }
1602     }
1603   }
1604   *bs_max = j;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1611 {
1612   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614   PetscInt       i,j,k,count,*rows;
1615   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1616   PetscScalar    zero = 0.0;
1617   MatScalar      *aa;
1618 
1619   PetscFunctionBegin;
1620   /* Make a copy of the IS and  sort it */
1621   /* allocate memory for rows,sizes */
1622   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1623   sizes = rows + is_n;
1624 
1625   /* copy IS values to rows, and sort them */
1626   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1627   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1628   if (baij->keepzeroedrows) {
1629     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1630     bs_max = is_n;
1631     A->same_nonzero = PETSC_TRUE;
1632   } else {
1633     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1634     A->same_nonzero = PETSC_FALSE;
1635   }
1636 
1637   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638     row   = rows[j];
1639     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1640     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642     if (sizes[i] == bs && !baij->keepzeroedrows) {
1643       if (diag != 0.0) {
1644         if (baij->ilen[row/bs] > 0) {
1645           baij->ilen[row/bs]       = 1;
1646           baij->j[baij->i[row/bs]] = row/bs;
1647           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1648         }
1649         /* Now insert all the diagonal values for this bs */
1650         for (k=0; k<bs; k++) {
1651           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1652         }
1653       } else { /* (diag == 0.0) */
1654         baij->ilen[row/bs] = 0;
1655       } /* end (diag == 0.0) */
1656     } else { /* (sizes[i] != bs) */
1657 #if defined (PETSC_USE_DEBUG)
1658       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659 #endif
1660       for (k=0; k<count; k++) {
1661         aa[0] =  zero;
1662         aa    += bs;
1663       }
1664       if (diag != 0.0) {
1665         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1666       }
1667     }
1668   }
1669 
1670   ierr = PetscFree(rows);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678 {
1679   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1680   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1681   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1683   PetscErrorCode ierr;
1684   PetscInt       ridx,cidx,bs2=a->bs2;
1685   PetscTruth     roworiented=a->roworiented;
1686   MatScalar      *ap,value,*aa=a->a,*bap;
1687 
1688   PetscFunctionBegin;
1689   for (k=0; k<m; k++) { /* loop over added rows */
1690     row  = im[k];
1691     brow = row/bs;
1692     if (row < 0) continue;
1693 #if defined(PETSC_USE_DEBUG)
1694     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1695 #endif
1696     rp   = aj + ai[brow];
1697     ap   = aa + bs2*ai[brow];
1698     rmax = imax[brow];
1699     nrow = ailen[brow];
1700     low  = 0;
1701     high = nrow;
1702     for (l=0; l<n; l++) { /* loop over added columns */
1703       if (in[l] < 0) continue;
1704 #if defined(PETSC_USE_DEBUG)
1705       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1706 #endif
1707       col = in[l]; bcol = col/bs;
1708       ridx = row % bs; cidx = col % bs;
1709       if (roworiented) {
1710         value = v[l + k*n];
1711       } else {
1712         value = v[k + l*m];
1713       }
1714       if (col <= lastcol) low = 0; else high = nrow;
1715       lastcol = col;
1716       while (high-low > 7) {
1717         t = (low+high)/2;
1718         if (rp[t] > bcol) high = t;
1719         else              low  = t;
1720       }
1721       for (i=low; i<high; i++) {
1722         if (rp[i] > bcol) break;
1723         if (rp[i] == bcol) {
1724           bap  = ap +  bs2*i + bs*cidx + ridx;
1725           if (is == ADD_VALUES) *bap += value;
1726           else                  *bap  = value;
1727           goto noinsert1;
1728         }
1729       }
1730       if (nonew == 1) goto noinsert1;
1731       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1732       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1733       N = nrow++ - 1; high++;
1734       /* shift up all the later entries in this row */
1735       for (ii=N; ii>=i; ii--) {
1736         rp[ii+1] = rp[ii];
1737         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1738       }
1739       if (N>=i) {
1740         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1741       }
1742       rp[i]                      = bcol;
1743       ap[bs2*i + bs*cidx + ridx] = value;
1744       a->nz++;
1745       noinsert1:;
1746       low = i;
1747     }
1748     ailen[brow] = nrow;
1749   }
1750   A->same_nonzero = PETSC_FALSE;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1757 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1758 {
1759   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1760   Mat            outA;
1761   PetscErrorCode ierr;
1762   PetscTruth     row_identity,col_identity;
1763 
1764   PetscFunctionBegin;
1765   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1766   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1767   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1768   if (!row_identity || !col_identity) {
1769     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1770   }
1771 
1772   outA          = inA;
1773   inA->factor   = FACTOR_LU;
1774 
1775   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1776 
1777   a->row        = row;
1778   a->col        = col;
1779   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1780   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1781 
1782   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1783   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1784   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1785 
1786   /*
1787       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1788       for ILU(0) factorization with natural ordering
1789   */
1790   if (inA->rmap.bs < 8) {
1791     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1792   } else {
1793     if (!a->solve_work) {
1794       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1795       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1796     }
1797   }
1798 
1799   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1800 
1801   PetscFunctionReturn(0);
1802 }
1803 
1804 EXTERN_C_BEGIN
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1807 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1808 {
1809   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1810   PetscInt    i,nz,nbs;
1811 
1812   PetscFunctionBegin;
1813   nz  = baij->maxnz/baij->bs2;
1814   nbs = baij->nbs;
1815   for (i=0; i<nz; i++) {
1816     baij->j[i] = indices[i];
1817   }
1818   baij->nz = nz;
1819   for (i=0; i<nbs; i++) {
1820     baij->ilen[i] = baij->imax[i];
1821   }
1822 
1823   PetscFunctionReturn(0);
1824 }
1825 EXTERN_C_END
1826 
1827 #undef __FUNCT__
1828 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1829 /*@
1830     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1831        in the matrix.
1832 
1833   Input Parameters:
1834 +  mat - the SeqBAIJ matrix
1835 -  indices - the column indices
1836 
1837   Level: advanced
1838 
1839   Notes:
1840     This can be called if you have precomputed the nonzero structure of the
1841   matrix and want to provide it to the matrix object to improve the performance
1842   of the MatSetValues() operation.
1843 
1844     You MUST have set the correct numbers of nonzeros per row in the call to
1845   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1846 
1847     MUST be called before any calls to MatSetValues();
1848 
1849 @*/
1850 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1851 {
1852   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1853 
1854   PetscFunctionBegin;
1855   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1856   PetscValidPointer(indices,2);
1857   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1858   if (f) {
1859     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1860   } else {
1861     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1862   }
1863   PetscFunctionReturn(0);
1864 }
1865 
1866 #undef __FUNCT__
1867 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1868 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1869 {
1870   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1871   PetscErrorCode ierr;
1872   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1873   PetscReal      atmp;
1874   PetscScalar    *x,zero = 0.0;
1875   MatScalar      *aa;
1876   PetscInt       ncols,brow,krow,kcol;
1877 
1878   PetscFunctionBegin;
1879   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1880   bs   = A->rmap.bs;
1881   aa   = a->a;
1882   ai   = a->i;
1883   aj   = a->j;
1884   mbs = a->mbs;
1885 
1886   ierr = VecSet(v,zero);CHKERRQ(ierr);
1887   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1888   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1889   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1890   for (i=0; i<mbs; i++) {
1891     ncols = ai[1] - ai[0]; ai++;
1892     brow  = bs*i;
1893     for (j=0; j<ncols; j++){
1894       /* bcol = bs*(*aj); */
1895       for (kcol=0; kcol<bs; kcol++){
1896         for (krow=0; krow<bs; krow++){
1897           atmp = PetscAbsScalar(*aa); aa++;
1898           row = brow + krow;    /* row index */
1899           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1900           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1901         }
1902       }
1903       aj++;
1904     }
1905   }
1906   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1907   PetscFunctionReturn(0);
1908 }
1909 
1910 #undef __FUNCT__
1911 #define __FUNCT__ "MatCopy_SeqBAIJ"
1912 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1913 {
1914   PetscErrorCode ierr;
1915 
1916   PetscFunctionBegin;
1917   /* If the two matrices have the same copy implementation, use fast copy. */
1918   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1919     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1920     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1921 
1922     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1923       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1924     }
1925     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1926   } else {
1927     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1928   }
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1934 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1935 {
1936   PetscErrorCode ierr;
1937 
1938   PetscFunctionBegin;
1939   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1940   PetscFunctionReturn(0);
1941 }
1942 
1943 #undef __FUNCT__
1944 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1945 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1946 {
1947   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1948   PetscFunctionBegin;
1949   *array = a->a;
1950   PetscFunctionReturn(0);
1951 }
1952 
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1955 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1956 {
1957   PetscFunctionBegin;
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #include "petscblaslapack.h"
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1964 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1965 {
1966   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1967   PetscErrorCode ierr;
1968   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1969   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1970 
1971   PetscFunctionBegin;
1972   if (str == SAME_NONZERO_PATTERN) {
1973     PetscScalar alpha = a;
1974     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1975   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1976     if (y->xtoy && y->XtoY != X) {
1977       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1978       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1979     }
1980     if (!y->xtoy) { /* get xtoy */
1981       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1982       y->XtoY = X;
1983     }
1984     bs2 = bs*bs;
1985     for (i=0; i<x->nz; i++) {
1986       j = 0;
1987       while (j < bs2){
1988         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1989         j++;
1990       }
1991     }
1992     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1993   } else {
1994     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1995   }
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 #undef __FUNCT__
2000 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2001 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2002 {
2003   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2004   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2005   PetscScalar    *aa = a->a;
2006 
2007   PetscFunctionBegin;
2008   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2009   PetscFunctionReturn(0);
2010 }
2011 
2012 #undef __FUNCT__
2013 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2014 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2015 {
2016   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2017   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2018   PetscScalar    *aa = a->a;
2019 
2020   PetscFunctionBegin;
2021   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 
2026 /* -------------------------------------------------------------------*/
2027 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2028        MatGetRow_SeqBAIJ,
2029        MatRestoreRow_SeqBAIJ,
2030        MatMult_SeqBAIJ_N,
2031 /* 4*/ MatMultAdd_SeqBAIJ_N,
2032        MatMultTranspose_SeqBAIJ,
2033        MatMultTransposeAdd_SeqBAIJ,
2034        MatSolve_SeqBAIJ_N,
2035        0,
2036        0,
2037 /*10*/ 0,
2038        MatLUFactor_SeqBAIJ,
2039        0,
2040        0,
2041        MatTranspose_SeqBAIJ,
2042 /*15*/ MatGetInfo_SeqBAIJ,
2043        MatEqual_SeqBAIJ,
2044        MatGetDiagonal_SeqBAIJ,
2045        MatDiagonalScale_SeqBAIJ,
2046        MatNorm_SeqBAIJ,
2047 /*20*/ 0,
2048        MatAssemblyEnd_SeqBAIJ,
2049        0,
2050        MatSetOption_SeqBAIJ,
2051        MatZeroEntries_SeqBAIJ,
2052 /*25*/ MatZeroRows_SeqBAIJ,
2053        MatLUFactorSymbolic_SeqBAIJ,
2054        MatLUFactorNumeric_SeqBAIJ_N,
2055        MatCholeskyFactorSymbolic_SeqBAIJ,
2056        MatCholeskyFactorNumeric_SeqBAIJ_N,
2057 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2058        MatILUFactorSymbolic_SeqBAIJ,
2059        MatICCFactorSymbolic_SeqBAIJ,
2060        MatGetArray_SeqBAIJ,
2061        MatRestoreArray_SeqBAIJ,
2062 /*35*/ MatDuplicate_SeqBAIJ,
2063        0,
2064        0,
2065        MatILUFactor_SeqBAIJ,
2066        0,
2067 /*40*/ MatAXPY_SeqBAIJ,
2068        MatGetSubMatrices_SeqBAIJ,
2069        MatIncreaseOverlap_SeqBAIJ,
2070        MatGetValues_SeqBAIJ,
2071        MatCopy_SeqBAIJ,
2072 /*45*/ 0,
2073        MatScale_SeqBAIJ,
2074        0,
2075        0,
2076        0,
2077 /*50*/ 0,
2078        MatGetRowIJ_SeqBAIJ,
2079        MatRestoreRowIJ_SeqBAIJ,
2080        0,
2081        0,
2082 /*55*/ 0,
2083        0,
2084        0,
2085        0,
2086        MatSetValuesBlocked_SeqBAIJ,
2087 /*60*/ MatGetSubMatrix_SeqBAIJ,
2088        MatDestroy_SeqBAIJ,
2089        MatView_SeqBAIJ,
2090        0,
2091        0,
2092 /*65*/ 0,
2093        0,
2094        0,
2095        0,
2096        0,
2097 /*70*/ MatGetRowMax_SeqBAIJ,
2098        MatConvert_Basic,
2099        0,
2100        0,
2101        0,
2102 /*75*/ 0,
2103        0,
2104        0,
2105        0,
2106        0,
2107 /*80*/ 0,
2108        0,
2109        0,
2110        0,
2111        MatLoad_SeqBAIJ,
2112 /*85*/ 0,
2113        0,
2114        0,
2115        0,
2116        0,
2117 /*90*/ 0,
2118        0,
2119        0,
2120        0,
2121        0,
2122 /*95*/ 0,
2123        0,
2124        0,
2125        0,
2126        0,
2127 /*100*/0,
2128        0,
2129        0,
2130        0,
2131        0,
2132 /*105*/0,
2133        MatRealPart_SeqBAIJ,
2134        MatImaginaryPart_SeqBAIJ
2135 };
2136 
2137 EXTERN_C_BEGIN
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2140 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2141 {
2142   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2143   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2144   PetscErrorCode ierr;
2145 
2146   PetscFunctionBegin;
2147   if (aij->nonew != 1) {
2148     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2149   }
2150 
2151   /* allocate space for values if not already there */
2152   if (!aij->saved_values) {
2153     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2154   }
2155 
2156   /* copy values over */
2157   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2158   PetscFunctionReturn(0);
2159 }
2160 EXTERN_C_END
2161 
2162 EXTERN_C_BEGIN
2163 #undef __FUNCT__
2164 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2165 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2166 {
2167   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2168   PetscErrorCode ierr;
2169   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2170 
2171   PetscFunctionBegin;
2172   if (aij->nonew != 1) {
2173     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2174   }
2175   if (!aij->saved_values) {
2176     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2177   }
2178 
2179   /* copy values over */
2180   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2181   PetscFunctionReturn(0);
2182 }
2183 EXTERN_C_END
2184 
2185 EXTERN_C_BEGIN
2186 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2187 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2188 EXTERN_C_END
2189 
2190 EXTERN_C_BEGIN
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2193 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2194 {
2195   Mat_SeqBAIJ    *b;
2196   PetscErrorCode ierr;
2197   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2198   PetscTruth     flg,skipallocation = PETSC_FALSE;
2199 
2200   PetscFunctionBegin;
2201 
2202   if (nz == MAT_SKIP_ALLOCATION) {
2203     skipallocation = PETSC_TRUE;
2204     nz             = 0;
2205   }
2206 
2207   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2208     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2209   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2210 
2211   if (nnz && newbs != bs) {
2212     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2213   }
2214   bs   = newbs;
2215 
2216   B->rmap.bs = B->cmap.bs = bs;
2217   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2218   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2219 
2220   B->preallocated = PETSC_TRUE;
2221 
2222   mbs  = B->rmap.n/bs;
2223   nbs  = B->cmap.n/bs;
2224   bs2  = bs*bs;
2225 
2226   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2227     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2228   }
2229 
2230   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2231   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2232   if (nnz) {
2233     for (i=0; i<mbs; i++) {
2234       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2235       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2236     }
2237   }
2238 
2239   b       = (Mat_SeqBAIJ*)B->data;
2240   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2241     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2242   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2243 
2244   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2245   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2246   if (!flg) {
2247     switch (bs) {
2248     case 1:
2249       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2250       B->ops->mult            = MatMult_SeqBAIJ_1;
2251       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2252       break;
2253     case 2:
2254       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2255       B->ops->mult            = MatMult_SeqBAIJ_2;
2256       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2257       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2258       break;
2259     case 3:
2260       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2261       B->ops->mult            = MatMult_SeqBAIJ_3;
2262       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2263       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2264       break;
2265     case 4:
2266       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2267       B->ops->mult            = MatMult_SeqBAIJ_4;
2268       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2269       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2270       break;
2271     case 5:
2272       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2273       B->ops->mult            = MatMult_SeqBAIJ_5;
2274       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2275       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2276       break;
2277     case 6:
2278       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2279       B->ops->mult            = MatMult_SeqBAIJ_6;
2280       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2281       break;
2282     case 7:
2283       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2284       B->ops->mult            = MatMult_SeqBAIJ_7;
2285       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2286       break;
2287     default:
2288       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2289       B->ops->mult            = MatMult_SeqBAIJ_N;
2290       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2291       break;
2292     }
2293   }
2294   B->rmap.bs      = bs;
2295   b->mbs     = mbs;
2296   b->nbs     = nbs;
2297   if (!skipallocation) {
2298     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2299     /* b->ilen will count nonzeros in each block row so far. */
2300     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2301     if (!nnz) {
2302       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2303       else if (nz <= 0)        nz = 1;
2304       for (i=0; i<mbs; i++) b->imax[i] = nz;
2305       nz = nz*mbs;
2306     } else {
2307       nz = 0;
2308       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2309     }
2310 
2311     /* allocate the matrix space */
2312     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2313     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2314     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2315     b->singlemalloc = PETSC_TRUE;
2316 
2317     b->i[0] = 0;
2318     for (i=1; i<mbs+1; i++) {
2319       b->i[i] = b->i[i-1] + b->imax[i-1];
2320     }
2321     b->free_a     = PETSC_TRUE;
2322     b->free_ij    = PETSC_TRUE;
2323   } else {
2324     b->free_a     = PETSC_FALSE;
2325     b->free_ij    = PETSC_FALSE;
2326   }
2327 
2328   B->rmap.bs          = bs;
2329   b->bs2              = bs2;
2330   b->mbs              = mbs;
2331   b->nz               = 0;
2332   b->maxnz            = nz*bs2;
2333   B->info.nz_unneeded = (PetscReal)b->maxnz;
2334   PetscFunctionReturn(0);
2335 }
2336 EXTERN_C_END
2337 
2338 /*MC
2339    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2340    block sparse compressed row format.
2341 
2342    Options Database Keys:
2343 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2344 
2345   Level: beginner
2346 
2347 .seealso: MatCreateSeqBAIJ()
2348 M*/
2349 
2350 EXTERN_C_BEGIN
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatCreate_SeqBAIJ"
2353 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2354 {
2355   PetscErrorCode ierr;
2356   PetscMPIInt    size;
2357   Mat_SeqBAIJ    *b;
2358 
2359   PetscFunctionBegin;
2360   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2361   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2362 
2363   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2364   B->data = (void*)b;
2365   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2366   B->factor           = 0;
2367   B->mapping          = 0;
2368   b->row              = 0;
2369   b->col              = 0;
2370   b->icol             = 0;
2371   b->reallocs         = 0;
2372   b->saved_values     = 0;
2373 #if defined(PETSC_USE_MAT_SINGLE)
2374   b->setvalueslen     = 0;
2375   b->setvaluescopy    = PETSC_NULL;
2376 #endif
2377 
2378   b->sorted           = PETSC_FALSE;
2379   b->roworiented      = PETSC_TRUE;
2380   b->nonew            = 0;
2381   b->diag             = 0;
2382   b->solve_work       = 0;
2383   b->mult_work        = 0;
2384   B->spptr            = 0;
2385   B->info.nz_unneeded = (PetscReal)b->maxnz;
2386   b->keepzeroedrows   = PETSC_FALSE;
2387   b->xtoy              = 0;
2388   b->XtoY              = 0;
2389   b->compressedrow.use     = PETSC_FALSE;
2390   b->compressedrow.nrows   = 0;
2391   b->compressedrow.i       = PETSC_NULL;
2392   b->compressedrow.rindex  = PETSC_NULL;
2393   b->compressedrow.checked = PETSC_FALSE;
2394   B->same_nonzero          = PETSC_FALSE;
2395 
2396   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2397                                      "MatInvertBlockDiagonal_SeqBAIJ",
2398                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2399   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2400                                      "MatStoreValues_SeqBAIJ",
2401                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2402   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2403                                      "MatRetrieveValues_SeqBAIJ",
2404                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2405   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2406                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2407                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2408   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2409                                      "MatConvert_SeqBAIJ_SeqAIJ",
2410                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2411   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2412                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2413                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2414   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2415                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2416                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2417   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 EXTERN_C_END
2421 
2422 #undef __FUNCT__
2423 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2424 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2425 {
2426   Mat            C;
2427   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2428   PetscErrorCode ierr;
2429   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2430 
2431   PetscFunctionBegin;
2432   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2433 
2434   *B = 0;
2435   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2436   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2437   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2438   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2439   c    = (Mat_SeqBAIJ*)C->data;
2440 
2441   C->rmap.N   = A->rmap.N;
2442   C->cmap.N   = A->cmap.N;
2443   C->rmap.bs  = A->rmap.bs;
2444   c->bs2 = a->bs2;
2445   c->mbs = a->mbs;
2446   c->nbs = a->nbs;
2447 
2448   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2449   for (i=0; i<mbs; i++) {
2450     c->imax[i] = a->imax[i];
2451     c->ilen[i] = a->ilen[i];
2452   }
2453 
2454   /* allocate the matrix space */
2455   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2456   c->singlemalloc = PETSC_TRUE;
2457   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2458   if (mbs > 0) {
2459     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2460     if (cpvalues == MAT_COPY_VALUES) {
2461       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2462     } else {
2463       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2464     }
2465   }
2466   c->sorted      = a->sorted;
2467   c->roworiented = a->roworiented;
2468   c->nonew       = a->nonew;
2469 
2470   if (a->diag) {
2471     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2472     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2473     for (i=0; i<mbs; i++) {
2474       c->diag[i] = a->diag[i];
2475     }
2476   } else c->diag        = 0;
2477   c->nz                 = a->nz;
2478   c->maxnz              = a->maxnz;
2479   c->solve_work         = 0;
2480   c->mult_work          = 0;
2481   c->free_a             = PETSC_TRUE;
2482   c->free_ij            = PETSC_TRUE;
2483   C->preallocated       = PETSC_TRUE;
2484   C->assembled          = PETSC_TRUE;
2485 
2486   c->compressedrow.use     = a->compressedrow.use;
2487   c->compressedrow.nrows   = a->compressedrow.nrows;
2488   c->compressedrow.checked = a->compressedrow.checked;
2489   if ( a->compressedrow.checked && a->compressedrow.use){
2490     i = a->compressedrow.nrows;
2491     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2492     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2493     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2494     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2495   } else {
2496     c->compressedrow.use    = PETSC_FALSE;
2497     c->compressedrow.i      = PETSC_NULL;
2498     c->compressedrow.rindex = PETSC_NULL;
2499   }
2500   C->same_nonzero = A->same_nonzero;
2501   *B = C;
2502   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatLoad_SeqBAIJ"
2508 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2509 {
2510   Mat_SeqBAIJ    *a;
2511   Mat            B;
2512   PetscErrorCode ierr;
2513   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2514   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2515   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2516   PetscInt       *masked,nmask,tmp,bs2,ishift;
2517   PetscMPIInt    size;
2518   int            fd;
2519   PetscScalar    *aa;
2520   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2521 
2522   PetscFunctionBegin;
2523   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2524     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2525   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2526   bs2  = bs*bs;
2527 
2528   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2529   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2530   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2531   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2532   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2533   M = header[1]; N = header[2]; nz = header[3];
2534 
2535   if (header[3] < 0) {
2536     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2537   }
2538 
2539   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2540 
2541   /*
2542      This code adds extra rows to make sure the number of rows is
2543     divisible by the blocksize
2544   */
2545   mbs        = M/bs;
2546   extra_rows = bs - M + bs*(mbs);
2547   if (extra_rows == bs) extra_rows = 0;
2548   else                  mbs++;
2549   if (extra_rows) {
2550     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2551   }
2552 
2553   /* read in row lengths */
2554   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2555   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2556   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2557 
2558   /* read in column indices */
2559   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2560   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2561   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2562 
2563   /* loop over row lengths determining block row lengths */
2564   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2565   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2566   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2567   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2568   masked   = mask + mbs;
2569   rowcount = 0; nzcount = 0;
2570   for (i=0; i<mbs; i++) {
2571     nmask = 0;
2572     for (j=0; j<bs; j++) {
2573       kmax = rowlengths[rowcount];
2574       for (k=0; k<kmax; k++) {
2575         tmp = jj[nzcount++]/bs;
2576         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2577       }
2578       rowcount++;
2579     }
2580     browlengths[i] += nmask;
2581     /* zero out the mask elements we set */
2582     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2583   }
2584 
2585   /* create our matrix */
2586   ierr = MatCreate(comm,&B);
2587   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2588   ierr = MatSetType(B,type);CHKERRQ(ierr);
2589   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2590   a = (Mat_SeqBAIJ*)B->data;
2591 
2592   /* set matrix "i" values */
2593   a->i[0] = 0;
2594   for (i=1; i<= mbs; i++) {
2595     a->i[i]      = a->i[i-1] + browlengths[i-1];
2596     a->ilen[i-1] = browlengths[i-1];
2597   }
2598   a->nz         = 0;
2599   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2600 
2601   /* read in nonzero values */
2602   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2603   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2604   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2605 
2606   /* set "a" and "j" values into matrix */
2607   nzcount = 0; jcount = 0;
2608   for (i=0; i<mbs; i++) {
2609     nzcountb = nzcount;
2610     nmask    = 0;
2611     for (j=0; j<bs; j++) {
2612       kmax = rowlengths[i*bs+j];
2613       for (k=0; k<kmax; k++) {
2614         tmp = jj[nzcount++]/bs;
2615 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2616       }
2617     }
2618     /* sort the masked values */
2619     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2620 
2621     /* set "j" values into matrix */
2622     maskcount = 1;
2623     for (j=0; j<nmask; j++) {
2624       a->j[jcount++]  = masked[j];
2625       mask[masked[j]] = maskcount++;
2626     }
2627     /* set "a" values into matrix */
2628     ishift = bs2*a->i[i];
2629     for (j=0; j<bs; j++) {
2630       kmax = rowlengths[i*bs+j];
2631       for (k=0; k<kmax; k++) {
2632         tmp       = jj[nzcountb]/bs ;
2633         block     = mask[tmp] - 1;
2634         point     = jj[nzcountb] - bs*tmp;
2635         idx       = ishift + bs2*block + j + bs*point;
2636         a->a[idx] = (MatScalar)aa[nzcountb++];
2637       }
2638     }
2639     /* zero out the mask elements we set */
2640     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2641   }
2642   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2643 
2644   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2645   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2646   ierr = PetscFree(aa);CHKERRQ(ierr);
2647   ierr = PetscFree(jj);CHKERRQ(ierr);
2648   ierr = PetscFree(mask);CHKERRQ(ierr);
2649 
2650   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2652   ierr = MatView_Private(B);CHKERRQ(ierr);
2653 
2654   *A = B;
2655   PetscFunctionReturn(0);
2656 }
2657 
2658 #undef __FUNCT__
2659 #define __FUNCT__ "MatCreateSeqBAIJ"
2660 /*@C
2661    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2662    compressed row) format.  For good matrix assembly performance the
2663    user should preallocate the matrix storage by setting the parameter nz
2664    (or the array nnz).  By setting these parameters accurately, performance
2665    during matrix assembly can be increased by more than a factor of 50.
2666 
2667    Collective on MPI_Comm
2668 
2669    Input Parameters:
2670 +  comm - MPI communicator, set to PETSC_COMM_SELF
2671 .  bs - size of block
2672 .  m - number of rows
2673 .  n - number of columns
2674 .  nz - number of nonzero blocks  per block row (same for all rows)
2675 -  nnz - array containing the number of nonzero blocks in the various block rows
2676          (possibly different for each block row) or PETSC_NULL
2677 
2678    Output Parameter:
2679 .  A - the matrix
2680 
2681    Options Database Keys:
2682 .   -mat_no_unroll - uses code that does not unroll the loops in the
2683                      block calculations (much slower)
2684 .    -mat_block_size - size of the blocks to use
2685 
2686    Level: intermediate
2687 
2688    Notes:
2689    The number of rows and columns must be divisible by blocksize.
2690 
2691    If the nnz parameter is given then the nz parameter is ignored
2692 
2693    A nonzero block is any block that as 1 or more nonzeros in it
2694 
2695    The block AIJ format is fully compatible with standard Fortran 77
2696    storage.  That is, the stored row and column indices can begin at
2697    either one (as in Fortran) or zero.  See the users' manual for details.
2698 
2699    Specify the preallocated storage with either nz or nnz (not both).
2700    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2701    allocation.  For additional details, see the users manual chapter on
2702    matrices.
2703 
2704 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2705 @*/
2706 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2707 {
2708   PetscErrorCode ierr;
2709 
2710   PetscFunctionBegin;
2711   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2712   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2713   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2714   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2720 /*@C
2721    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2722    per row in the matrix. For good matrix assembly performance the
2723    user should preallocate the matrix storage by setting the parameter nz
2724    (or the array nnz).  By setting these parameters accurately, performance
2725    during matrix assembly can be increased by more than a factor of 50.
2726 
2727    Collective on MPI_Comm
2728 
2729    Input Parameters:
2730 +  A - the matrix
2731 .  bs - size of block
2732 .  nz - number of block nonzeros per block row (same for all rows)
2733 -  nnz - array containing the number of block nonzeros in the various block rows
2734          (possibly different for each block row) or PETSC_NULL
2735 
2736    Options Database Keys:
2737 .   -mat_no_unroll - uses code that does not unroll the loops in the
2738                      block calculations (much slower)
2739 .    -mat_block_size - size of the blocks to use
2740 
2741    Level: intermediate
2742 
2743    Notes:
2744    If the nnz parameter is given then the nz parameter is ignored
2745 
2746    The block AIJ format is fully compatible with standard Fortran 77
2747    storage.  That is, the stored row and column indices can begin at
2748    either one (as in Fortran) or zero.  See the users' manual for details.
2749 
2750    Specify the preallocated storage with either nz or nnz (not both).
2751    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2752    allocation.  For additional details, see the users manual chapter on
2753    matrices.
2754 
2755 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2756 @*/
2757 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2758 {
2759   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2760 
2761   PetscFunctionBegin;
2762   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2763   if (f) {
2764     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2765   }
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 #undef __FUNCT__
2770 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2771 /*@
2772      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2773               (upper triangular entries in CSR format) provided by the user.
2774 
2775      Collective on MPI_Comm
2776 
2777    Input Parameters:
2778 +  comm - must be an MPI communicator of size 1
2779 .  bs - size of block
2780 .  m - number of rows
2781 .  n - number of columns
2782 .  i - row indices
2783 .  j - column indices
2784 -  a - matrix values
2785 
2786    Output Parameter:
2787 .  mat - the matrix
2788 
2789    Level: intermediate
2790 
2791    Notes:
2792        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2793     once the matrix is destroyed
2794 
2795        You cannot set new nonzero locations into this matrix, that will generate an error.
2796 
2797        The i and j indices are 0 based
2798 
2799 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2800 
2801 @*/
2802 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2803 {
2804   PetscErrorCode ierr;
2805   PetscInt       ii;
2806   Mat_SeqBAIJ    *baij;
2807 
2808   PetscFunctionBegin;
2809   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2810   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2811 
2812   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2813   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2814   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2815   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2816   baij = (Mat_SeqBAIJ*)(*mat)->data;
2817   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2818 
2819   baij->i = i;
2820   baij->j = j;
2821   baij->a = a;
2822   baij->singlemalloc = PETSC_FALSE;
2823   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2824   baij->free_a       = PETSC_FALSE;
2825   baij->free_ij       = PETSC_FALSE;
2826 
2827   for (ii=0; ii<m; ii++) {
2828     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2829 #if defined(PETSC_USE_DEBUG)
2830     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2831 #endif
2832   }
2833 #if defined(PETSC_USE_DEBUG)
2834   for (ii=0; ii<baij->i[m]; ii++) {
2835     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2836     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2837   }
2838 #endif
2839 
2840   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2842   PetscFunctionReturn(0);
2843 }
2844