xref: /petsc/src/mat/impls/baij/seq/baij.c (revision db268bfa9c46c24c134d4e2e1d78f27d60e1123e)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt       n = a->mbs,i;
785 
786   PetscFunctionBegin;
787   *nn = n;
788   if (!ia) PetscFunctionReturn(0);
789   if (symmetric) {
790     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
791   } else if (oshift == 1) {
792     /* temporarily add 1 to i and j indices */
793     PetscInt nz = a->i[n];
794     for (i=0; i<nz; i++) a->j[i]++;
795     for (i=0; i<n+1; i++) a->i[i]++;
796     *ia = a->i; *ja = a->j;
797   } else {
798     *ia = a->i; *ja = a->j;
799   }
800 
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807 {
808   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
809   PetscErrorCode ierr;
810   PetscInt       i,n = a->mbs;
811 
812   PetscFunctionBegin;
813   if (!ia) PetscFunctionReturn(0);
814   if (symmetric) {
815     ierr = PetscFree(*ia);CHKERRQ(ierr);
816     ierr = PetscFree(*ja);CHKERRQ(ierr);
817   } else if (oshift == 1) {
818     PetscInt nz = a->i[n]-1;
819     for (i=0; i<nz; i++) a->j[i]--;
820     for (i=0; i<n+1; i++) a->i[i]--;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_SeqBAIJ"
827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835 #endif
836   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(a->diag);CHKERRQ(ierr);
844   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
845   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
846   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
847   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
850 #if defined(PETSC_USE_MAT_SINGLE)
851   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
852 #endif
853   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
865   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatSetOption_SeqBAIJ"
871 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
872 {
873   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
874   PetscErrorCode ierr;
875 
876   PetscFunctionBegin;
877   switch (op) {
878   case MAT_ROW_ORIENTED:
879     a->roworiented    = PETSC_TRUE;
880     break;
881   case MAT_COLUMN_ORIENTED:
882     a->roworiented    = PETSC_FALSE;
883     break;
884   case MAT_COLUMNS_SORTED:
885     a->sorted         = PETSC_TRUE;
886     break;
887   case MAT_COLUMNS_UNSORTED:
888     a->sorted         = PETSC_FALSE;
889     break;
890   case MAT_KEEP_ZEROED_ROWS:
891     a->keepzeroedrows = PETSC_TRUE;
892     break;
893   case MAT_NO_NEW_NONZERO_LOCATIONS:
894     a->nonew          = 1;
895     break;
896   case MAT_NEW_NONZERO_LOCATION_ERR:
897     a->nonew          = -1;
898     break;
899   case MAT_NEW_NONZERO_ALLOCATION_ERR:
900     a->nonew          = -2;
901     break;
902   case MAT_YES_NEW_NONZERO_LOCATIONS:
903     a->nonew          = 0;
904     break;
905   case MAT_ROWS_SORTED:
906   case MAT_ROWS_UNSORTED:
907   case MAT_YES_NEW_DIAGONALS:
908   case MAT_IGNORE_OFF_PROC_ENTRIES:
909   case MAT_USE_HASH_TABLE:
910     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
911     break;
912   case MAT_NO_NEW_DIAGONALS:
913     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
914   case MAT_SYMMETRIC:
915   case MAT_STRUCTURALLY_SYMMETRIC:
916   case MAT_NOT_SYMMETRIC:
917   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
918   case MAT_HERMITIAN:
919   case MAT_NOT_HERMITIAN:
920   case MAT_SYMMETRY_ETERNAL:
921   case MAT_NOT_SYMMETRY_ETERNAL:
922     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
923     break;
924   default:
925     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
926   }
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatGetRow_SeqBAIJ"
932 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
933 {
934   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
935   PetscErrorCode ierr;
936   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
937   MatScalar      *aa,*aa_i;
938   PetscScalar    *v_i;
939 
940   PetscFunctionBegin;
941   bs  = A->rmap.bs;
942   ai  = a->i;
943   aj  = a->j;
944   aa  = a->a;
945   bs2 = a->bs2;
946 
947   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
948 
949   bn  = row/bs;   /* Block number */
950   bp  = row % bs; /* Block Position */
951   M   = ai[bn+1] - ai[bn];
952   *nz = bs*M;
953 
954   if (v) {
955     *v = 0;
956     if (*nz) {
957       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
958       for (i=0; i<M; i++) { /* for each block in the block row */
959         v_i  = *v + i*bs;
960         aa_i = aa + bs2*(ai[bn] + i);
961         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
962       }
963     }
964   }
965 
966   if (idx) {
967     *idx = 0;
968     if (*nz) {
969       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
970       for (i=0; i<M; i++) { /* for each block in the block row */
971         idx_i = *idx + i*bs;
972         itmp  = bs*aj[ai[bn] + i];
973         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
974       }
975     }
976   }
977   PetscFunctionReturn(0);
978 }
979 
980 #undef __FUNCT__
981 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
982 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
983 {
984   PetscErrorCode ierr;
985 
986   PetscFunctionBegin;
987   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
988   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
989   PetscFunctionReturn(0);
990 }
991 
992 #undef __FUNCT__
993 #define __FUNCT__ "MatTranspose_SeqBAIJ"
994 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
995 {
996   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
997   Mat            C;
998   PetscErrorCode ierr;
999   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1000   PetscInt       *rows,*cols,bs2=a->bs2;
1001   PetscScalar    *array;
1002 
1003   PetscFunctionBegin;
1004   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1005   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1006   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1007 
1008 #if defined(PETSC_USE_MAT_SINGLE)
1009   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1010   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1011 #else
1012   array = a->a;
1013 #endif
1014 
1015   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1016   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1017   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1018   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1019   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1020   ierr = PetscFree(col);CHKERRQ(ierr);
1021   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1022   cols = rows + bs;
1023   for (i=0; i<mbs; i++) {
1024     cols[0] = i*bs;
1025     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1026     len = ai[i+1] - ai[i];
1027     for (j=0; j<len; j++) {
1028       rows[0] = (*aj++)*bs;
1029       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1030       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1031       array += bs2;
1032     }
1033   }
1034   ierr = PetscFree(rows);CHKERRQ(ierr);
1035 #if defined(PETSC_USE_MAT_SINGLE)
1036   ierr = PetscFree(array);
1037 #endif
1038 
1039   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1041 
1042   if (B) {
1043     *B = C;
1044   } else {
1045     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1046   }
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1052 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1053 {
1054   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1055   PetscErrorCode ierr;
1056   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1057   int            fd;
1058   PetscScalar    *aa;
1059   FILE           *file;
1060 
1061   PetscFunctionBegin;
1062   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1063   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1064   col_lens[0] = MAT_FILE_COOKIE;
1065 
1066   col_lens[1] = A->rmap.N;
1067   col_lens[2] = A->cmap.n;
1068   col_lens[3] = a->nz*bs2;
1069 
1070   /* store lengths of each row and write (including header) to file */
1071   count = 0;
1072   for (i=0; i<a->mbs; i++) {
1073     for (j=0; j<bs; j++) {
1074       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1075     }
1076   }
1077   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1078   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1079 
1080   /* store column indices (zero start index) */
1081   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1082   count = 0;
1083   for (i=0; i<a->mbs; i++) {
1084     for (j=0; j<bs; j++) {
1085       for (k=a->i[i]; k<a->i[i+1]; k++) {
1086         for (l=0; l<bs; l++) {
1087           jj[count++] = bs*a->j[k] + l;
1088         }
1089       }
1090     }
1091   }
1092   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1093   ierr = PetscFree(jj);CHKERRQ(ierr);
1094 
1095   /* store nonzero values */
1096   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1097   count = 0;
1098   for (i=0; i<a->mbs; i++) {
1099     for (j=0; j<bs; j++) {
1100       for (k=a->i[i]; k<a->i[i+1]; k++) {
1101         for (l=0; l<bs; l++) {
1102           aa[count++] = a->a[bs2*k + l*bs + j];
1103         }
1104       }
1105     }
1106   }
1107   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1108   ierr = PetscFree(aa);CHKERRQ(ierr);
1109 
1110   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1111   if (file) {
1112     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1113   }
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1119 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1120 {
1121   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1122   PetscErrorCode    ierr;
1123   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1124   PetscViewerFormat format;
1125 
1126   PetscFunctionBegin;
1127   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1128   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1129     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1130   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1131     Mat aij;
1132     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1133     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1134     ierr = MatDestroy(aij);CHKERRQ(ierr);
1135   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1136      PetscFunctionReturn(0);
1137   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1138     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1139     for (i=0; i<a->mbs; i++) {
1140       for (j=0; j<bs; j++) {
1141         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1142         for (k=a->i[i]; k<a->i[i+1]; k++) {
1143           for (l=0; l<bs; l++) {
1144 #if defined(PETSC_USE_COMPLEX)
1145             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1146               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1147                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1148             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1149               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1150                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1151             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1152               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1153             }
1154 #else
1155             if (a->a[bs2*k + l*bs + j] != 0.0) {
1156               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1157             }
1158 #endif
1159           }
1160         }
1161         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1162       }
1163     }
1164     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1165   } else {
1166     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1167     for (i=0; i<a->mbs; i++) {
1168       for (j=0; j<bs; j++) {
1169         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1170         for (k=a->i[i]; k<a->i[i+1]; k++) {
1171           for (l=0; l<bs; l++) {
1172 #if defined(PETSC_USE_COMPLEX)
1173             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1174               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1175                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1176             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1177               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1178                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1179             } else {
1180               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1181             }
1182 #else
1183             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1184 #endif
1185           }
1186         }
1187         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1188       }
1189     }
1190     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1191   }
1192   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1198 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1199 {
1200   Mat            A = (Mat) Aa;
1201   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1202   PetscErrorCode ierr;
1203   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1204   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1205   MatScalar      *aa;
1206   PetscViewer    viewer;
1207 
1208   PetscFunctionBegin;
1209 
1210   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1211   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1212 
1213   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1214 
1215   /* loop over matrix elements drawing boxes */
1216   color = PETSC_DRAW_BLUE;
1217   for (i=0,row=0; i<mbs; i++,row+=bs) {
1218     for (j=a->i[i]; j<a->i[i+1]; j++) {
1219       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1220       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1221       aa = a->a + j*bs2;
1222       for (k=0; k<bs; k++) {
1223         for (l=0; l<bs; l++) {
1224           if (PetscRealPart(*aa++) >=  0.) continue;
1225           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1226         }
1227       }
1228     }
1229   }
1230   color = PETSC_DRAW_CYAN;
1231   for (i=0,row=0; i<mbs; i++,row+=bs) {
1232     for (j=a->i[i]; j<a->i[i+1]; j++) {
1233       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1234       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1235       aa = a->a + j*bs2;
1236       for (k=0; k<bs; k++) {
1237         for (l=0; l<bs; l++) {
1238           if (PetscRealPart(*aa++) != 0.) continue;
1239           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1240         }
1241       }
1242     }
1243   }
1244 
1245   color = PETSC_DRAW_RED;
1246   for (i=0,row=0; i<mbs; i++,row+=bs) {
1247     for (j=a->i[i]; j<a->i[i+1]; j++) {
1248       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1249       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1250       aa = a->a + j*bs2;
1251       for (k=0; k<bs; k++) {
1252         for (l=0; l<bs; l++) {
1253           if (PetscRealPart(*aa++) <= 0.) continue;
1254           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1255         }
1256       }
1257     }
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1264 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1265 {
1266   PetscErrorCode ierr;
1267   PetscReal      xl,yl,xr,yr,w,h;
1268   PetscDraw      draw;
1269   PetscTruth     isnull;
1270 
1271   PetscFunctionBegin;
1272 
1273   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1274   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1275 
1276   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1277   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1278   xr += w;    yr += h;  xl = -w;     yl = -h;
1279   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1280   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1281   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatView_SeqBAIJ"
1287 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1288 {
1289   PetscErrorCode ierr;
1290   PetscTruth     iascii,isbinary,isdraw;
1291 
1292   PetscFunctionBegin;
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1295   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1296   if (iascii){
1297     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1298   } else if (isbinary) {
1299     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1300   } else if (isdraw) {
1301     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1302   } else {
1303     Mat B;
1304     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1305     ierr = MatView(B,viewer);CHKERRQ(ierr);
1306     ierr = MatDestroy(B);CHKERRQ(ierr);
1307   }
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1314 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1315 {
1316   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1317   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1318   PetscInt    *ai = a->i,*ailen = a->ilen;
1319   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1320   MatScalar   *ap,*aa = a->a,zero = 0.0;
1321 
1322   PetscFunctionBegin;
1323   for (k=0; k<m; k++) { /* loop over rows */
1324     row  = im[k]; brow = row/bs;
1325     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1326     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1327     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1328     nrow = ailen[brow];
1329     for (l=0; l<n; l++) { /* loop over columns */
1330       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1331       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1332       col  = in[l] ;
1333       bcol = col/bs;
1334       cidx = col%bs;
1335       ridx = row%bs;
1336       high = nrow;
1337       low  = 0; /* assume unsorted */
1338       while (high-low > 5) {
1339         t = (low+high)/2;
1340         if (rp[t] > bcol) high = t;
1341         else             low  = t;
1342       }
1343       for (i=low; i<high; i++) {
1344         if (rp[i] > bcol) break;
1345         if (rp[i] == bcol) {
1346           *v++ = ap[bs2*i+bs*cidx+ridx];
1347           goto finished;
1348         }
1349       }
1350       *v++ = zero;
1351       finished:;
1352     }
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #if defined(PETSC_USE_MAT_SINGLE)
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1360 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1361 {
1362   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1363   PetscErrorCode ierr;
1364   PetscInt       i,N = m*n*b->bs2;
1365   MatScalar      *vsingle;
1366 
1367   PetscFunctionBegin;
1368   if (N > b->setvalueslen) {
1369     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1370     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1371     b->setvalueslen  = N;
1372   }
1373   vsingle = b->setvaluescopy;
1374   for (i=0; i<N; i++) {
1375     vsingle[i] = v[i];
1376   }
1377   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 #endif
1381 
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1385 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1386 {
1387   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1388   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1389   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1390   PetscErrorCode  ierr;
1391   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1392   PetscTruth      roworiented=a->roworiented;
1393   const MatScalar *value = v;
1394   MatScalar       *ap,*aa = a->a,*bap;
1395 
1396   PetscFunctionBegin;
1397   if (roworiented) {
1398     stepval = (n-1)*bs;
1399   } else {
1400     stepval = (m-1)*bs;
1401   }
1402   for (k=0; k<m; k++) { /* loop over added rows */
1403     row  = im[k];
1404     if (row < 0) continue;
1405 #if defined(PETSC_USE_DEBUG)
1406     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1407 #endif
1408     rp   = aj + ai[row];
1409     ap   = aa + bs2*ai[row];
1410     rmax = imax[row];
1411     nrow = ailen[row];
1412     low  = 0;
1413     high = nrow;
1414     for (l=0; l<n; l++) { /* loop over added columns */
1415       if (in[l] < 0) continue;
1416 #if defined(PETSC_USE_DEBUG)
1417       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1418 #endif
1419       col = in[l];
1420       if (roworiented) {
1421         value = v + k*(stepval+bs)*bs + l*bs;
1422       } else {
1423         value = v + l*(stepval+bs)*bs + k*bs;
1424       }
1425       if (col <= lastcol) low = 0; else high = nrow;
1426       lastcol = col;
1427       while (high-low > 7) {
1428         t = (low+high)/2;
1429         if (rp[t] > col) high = t;
1430         else             low  = t;
1431       }
1432       for (i=low; i<high; i++) {
1433         if (rp[i] > col) break;
1434         if (rp[i] == col) {
1435           bap  = ap +  bs2*i;
1436           if (roworiented) {
1437             if (is == ADD_VALUES) {
1438               for (ii=0; ii<bs; ii++,value+=stepval) {
1439                 for (jj=ii; jj<bs2; jj+=bs) {
1440                   bap[jj] += *value++;
1441                 }
1442               }
1443             } else {
1444               for (ii=0; ii<bs; ii++,value+=stepval) {
1445                 for (jj=ii; jj<bs2; jj+=bs) {
1446                   bap[jj] = *value++;
1447                 }
1448               }
1449             }
1450           } else {
1451             if (is == ADD_VALUES) {
1452               for (ii=0; ii<bs; ii++,value+=stepval) {
1453                 for (jj=0; jj<bs; jj++) {
1454                   *bap++ += *value++;
1455                 }
1456               }
1457             } else {
1458               for (ii=0; ii<bs; ii++,value+=stepval) {
1459                 for (jj=0; jj<bs; jj++) {
1460                   *bap++  = *value++;
1461                 }
1462               }
1463             }
1464           }
1465           goto noinsert2;
1466         }
1467       }
1468       if (nonew == 1) goto noinsert2;
1469       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1470       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1471       N = nrow++ - 1; high++;
1472       /* shift up all the later entries in this row */
1473       for (ii=N; ii>=i; ii--) {
1474         rp[ii+1] = rp[ii];
1475         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1476       }
1477       if (N >= i) {
1478         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1479       }
1480       rp[i] = col;
1481       bap   = ap +  bs2*i;
1482       if (roworiented) {
1483         for (ii=0; ii<bs; ii++,value+=stepval) {
1484           for (jj=ii; jj<bs2; jj+=bs) {
1485             bap[jj] = *value++;
1486           }
1487         }
1488       } else {
1489         for (ii=0; ii<bs; ii++,value+=stepval) {
1490           for (jj=0; jj<bs; jj++) {
1491             *bap++  = *value++;
1492           }
1493         }
1494       }
1495       noinsert2:;
1496       low = i;
1497     }
1498     ailen[row] = nrow;
1499   }
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1505 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1506 {
1507   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1508   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1509   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1510   PetscErrorCode ierr;
1511   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1512   MatScalar      *aa = a->a,*ap;
1513   PetscReal      ratio=0.6;
1514 
1515   PetscFunctionBegin;
1516   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1517 
1518   if (m) rmax = ailen[0];
1519   for (i=1; i<mbs; i++) {
1520     /* move each row back by the amount of empty slots (fshift) before it*/
1521     fshift += imax[i-1] - ailen[i-1];
1522     rmax   = PetscMax(rmax,ailen[i]);
1523     if (fshift) {
1524       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1525       N = ailen[i];
1526       for (j=0; j<N; j++) {
1527         ip[j-fshift] = ip[j];
1528         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1529       }
1530     }
1531     ai[i] = ai[i-1] + ailen[i-1];
1532   }
1533   if (mbs) {
1534     fshift += imax[mbs-1] - ailen[mbs-1];
1535     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1536   }
1537   /* reset ilen and imax for each row */
1538   for (i=0; i<mbs; i++) {
1539     ailen[i] = imax[i] = ai[i+1] - ai[i];
1540   }
1541   a->nz = ai[mbs];
1542 
1543   /* diagonals may have moved, so kill the diagonal pointers */
1544   a->idiagvalid = PETSC_FALSE;
1545   if (fshift && a->diag) {
1546     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1547     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1548     a->diag = 0;
1549   }
1550   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1551   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1552   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1553   a->reallocs          = 0;
1554   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1555 
1556   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1557   if (a->compressedrow.use){
1558     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1559   }
1560 
1561   A->same_nonzero = PETSC_TRUE;
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 /*
1566    This function returns an array of flags which indicate the locations of contiguous
1567    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1568    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1569    Assume: sizes should be long enough to hold all the values.
1570 */
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1573 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1574 {
1575   PetscInt   i,j,k,row;
1576   PetscTruth flg;
1577 
1578   PetscFunctionBegin;
1579   for (i=0,j=0; i<n; j++) {
1580     row = idx[i];
1581     if (row%bs!=0) { /* Not the begining of a block */
1582       sizes[j] = 1;
1583       i++;
1584     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1585       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1586       i++;
1587     } else { /* Begining of the block, so check if the complete block exists */
1588       flg = PETSC_TRUE;
1589       for (k=1; k<bs; k++) {
1590         if (row+k != idx[i+k]) { /* break in the block */
1591           flg = PETSC_FALSE;
1592           break;
1593         }
1594       }
1595       if (flg) { /* No break in the bs */
1596         sizes[j] = bs;
1597         i+= bs;
1598       } else {
1599         sizes[j] = 1;
1600         i++;
1601       }
1602     }
1603   }
1604   *bs_max = j;
1605   PetscFunctionReturn(0);
1606 }
1607 
1608 #undef __FUNCT__
1609 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1610 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1611 {
1612   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1613   PetscErrorCode ierr;
1614   PetscInt       i,j,k,count,*rows;
1615   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1616   PetscScalar    zero = 0.0;
1617   MatScalar      *aa;
1618 
1619   PetscFunctionBegin;
1620   /* Make a copy of the IS and  sort it */
1621   /* allocate memory for rows,sizes */
1622   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1623   sizes = rows + is_n;
1624 
1625   /* copy IS values to rows, and sort them */
1626   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1627   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1628   if (baij->keepzeroedrows) {
1629     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1630     bs_max = is_n;
1631     A->same_nonzero = PETSC_TRUE;
1632   } else {
1633     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1634     A->same_nonzero = PETSC_FALSE;
1635   }
1636 
1637   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1638     row   = rows[j];
1639     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1640     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1641     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1642     if (sizes[i] == bs && !baij->keepzeroedrows) {
1643       if (diag != 0.0) {
1644         if (baij->ilen[row/bs] > 0) {
1645           baij->ilen[row/bs]       = 1;
1646           baij->j[baij->i[row/bs]] = row/bs;
1647           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1648         }
1649         /* Now insert all the diagonal values for this bs */
1650         for (k=0; k<bs; k++) {
1651           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1652         }
1653       } else { /* (diag == 0.0) */
1654         baij->ilen[row/bs] = 0;
1655       } /* end (diag == 0.0) */
1656     } else { /* (sizes[i] != bs) */
1657 #if defined (PETSC_USE_DEBUG)
1658       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1659 #endif
1660       for (k=0; k<count; k++) {
1661         aa[0] =  zero;
1662         aa    += bs;
1663       }
1664       if (diag != 0.0) {
1665         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1666       }
1667     }
1668   }
1669 
1670   ierr = PetscFree(rows);CHKERRQ(ierr);
1671   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1672   PetscFunctionReturn(0);
1673 }
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1677 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1678 {
1679   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1680   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1681   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1682   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1683   PetscErrorCode ierr;
1684   PetscInt       ridx,cidx,bs2=a->bs2;
1685   PetscTruth     roworiented=a->roworiented;
1686   MatScalar      *ap,value,*aa=a->a,*bap;
1687 
1688   PetscFunctionBegin;
1689   for (k=0; k<m; k++) { /* loop over added rows */
1690     row  = im[k];
1691     brow = row/bs;
1692     if (row < 0) continue;
1693 #if defined(PETSC_USE_DEBUG)
1694     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1695 #endif
1696     rp   = aj + ai[brow];
1697     ap   = aa + bs2*ai[brow];
1698     rmax = imax[brow];
1699     nrow = ailen[brow];
1700     low  = 0;
1701     high = nrow;
1702     for (l=0; l<n; l++) { /* loop over added columns */
1703       if (in[l] < 0) continue;
1704 #if defined(PETSC_USE_DEBUG)
1705       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1706 #endif
1707       col = in[l]; bcol = col/bs;
1708       ridx = row % bs; cidx = col % bs;
1709       if (roworiented) {
1710         value = v[l + k*n];
1711       } else {
1712         value = v[k + l*m];
1713       }
1714       if (col <= lastcol) low = 0; else high = nrow;
1715       lastcol = col;
1716       while (high-low > 7) {
1717         t = (low+high)/2;
1718         if (rp[t] > bcol) high = t;
1719         else              low  = t;
1720       }
1721       for (i=low; i<high; i++) {
1722         if (rp[i] > bcol) break;
1723         if (rp[i] == bcol) {
1724           bap  = ap +  bs2*i + bs*cidx + ridx;
1725           if (is == ADD_VALUES) *bap += value;
1726           else                  *bap  = value;
1727           goto noinsert1;
1728         }
1729       }
1730       if (nonew == 1) goto noinsert1;
1731       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1732       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1733       N = nrow++ - 1; high++;
1734       /* shift up all the later entries in this row */
1735       for (ii=N; ii>=i; ii--) {
1736         rp[ii+1] = rp[ii];
1737         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1738       }
1739       if (N>=i) {
1740         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1741       }
1742       rp[i]                      = bcol;
1743       ap[bs2*i + bs*cidx + ridx] = value;
1744       a->nz++;
1745       noinsert1:;
1746       low = i;
1747     }
1748     ailen[brow] = nrow;
1749   }
1750   A->same_nonzero = PETSC_FALSE;
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1757 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1758 {
1759   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1760   Mat            outA;
1761   PetscErrorCode ierr;
1762   PetscTruth     row_identity,col_identity;
1763 
1764   PetscFunctionBegin;
1765   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1766   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1767   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1768   if (!row_identity || !col_identity) {
1769     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1770   }
1771 
1772   outA          = inA;
1773   inA->factor   = FACTOR_LU;
1774 
1775   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1776 
1777   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1778   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
1779   a->row = row;
1780   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1781   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
1782   a->col = col;
1783 
1784   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1785   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1786   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1787 
1788   /*
1789       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1790       for ILU(0) factorization with natural ordering
1791   */
1792   if (inA->rmap.bs < 8) {
1793     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1794   } else {
1795     if (!a->solve_work) {
1796       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1797       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1798     }
1799   }
1800 
1801   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1802 
1803   PetscFunctionReturn(0);
1804 }
1805 
1806 EXTERN_C_BEGIN
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1809 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1810 {
1811   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1812   PetscInt    i,nz,nbs;
1813 
1814   PetscFunctionBegin;
1815   nz  = baij->maxnz/baij->bs2;
1816   nbs = baij->nbs;
1817   for (i=0; i<nz; i++) {
1818     baij->j[i] = indices[i];
1819   }
1820   baij->nz = nz;
1821   for (i=0; i<nbs; i++) {
1822     baij->ilen[i] = baij->imax[i];
1823   }
1824 
1825   PetscFunctionReturn(0);
1826 }
1827 EXTERN_C_END
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1831 /*@
1832     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1833        in the matrix.
1834 
1835   Input Parameters:
1836 +  mat - the SeqBAIJ matrix
1837 -  indices - the column indices
1838 
1839   Level: advanced
1840 
1841   Notes:
1842     This can be called if you have precomputed the nonzero structure of the
1843   matrix and want to provide it to the matrix object to improve the performance
1844   of the MatSetValues() operation.
1845 
1846     You MUST have set the correct numbers of nonzeros per row in the call to
1847   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1848 
1849     MUST be called before any calls to MatSetValues();
1850 
1851 @*/
1852 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1853 {
1854   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1855 
1856   PetscFunctionBegin;
1857   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1858   PetscValidPointer(indices,2);
1859   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1860   if (f) {
1861     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1862   } else {
1863     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1864   }
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1870 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1871 {
1872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1873   PetscErrorCode ierr;
1874   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1875   PetscReal      atmp;
1876   PetscScalar    *x,zero = 0.0;
1877   MatScalar      *aa;
1878   PetscInt       ncols,brow,krow,kcol;
1879 
1880   PetscFunctionBegin;
1881   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1882   bs   = A->rmap.bs;
1883   aa   = a->a;
1884   ai   = a->i;
1885   aj   = a->j;
1886   mbs = a->mbs;
1887 
1888   ierr = VecSet(v,zero);CHKERRQ(ierr);
1889   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1890   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1891   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1892   for (i=0; i<mbs; i++) {
1893     ncols = ai[1] - ai[0]; ai++;
1894     brow  = bs*i;
1895     for (j=0; j<ncols; j++){
1896       /* bcol = bs*(*aj); */
1897       for (kcol=0; kcol<bs; kcol++){
1898         for (krow=0; krow<bs; krow++){
1899           atmp = PetscAbsScalar(*aa); aa++;
1900           row = brow + krow;    /* row index */
1901           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1902           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1903         }
1904       }
1905       aj++;
1906     }
1907   }
1908   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1909   PetscFunctionReturn(0);
1910 }
1911 
1912 #undef __FUNCT__
1913 #define __FUNCT__ "MatCopy_SeqBAIJ"
1914 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1915 {
1916   PetscErrorCode ierr;
1917 
1918   PetscFunctionBegin;
1919   /* If the two matrices have the same copy implementation, use fast copy. */
1920   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1921     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1922     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1923 
1924     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1925       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1926     }
1927     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1928   } else {
1929     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1930   }
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1936 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1937 {
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 #undef __FUNCT__
1946 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1947 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1948 {
1949   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1950   PetscFunctionBegin;
1951   *array = a->a;
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 #undef __FUNCT__
1956 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1957 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1958 {
1959   PetscFunctionBegin;
1960   PetscFunctionReturn(0);
1961 }
1962 
1963 #include "petscblaslapack.h"
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1966 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1967 {
1968   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1969   PetscErrorCode ierr;
1970   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1971   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1972 
1973   PetscFunctionBegin;
1974   if (str == SAME_NONZERO_PATTERN) {
1975     PetscScalar alpha = a;
1976     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1977   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1978     if (y->xtoy && y->XtoY != X) {
1979       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1980       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1981     }
1982     if (!y->xtoy) { /* get xtoy */
1983       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1984       y->XtoY = X;
1985     }
1986     bs2 = bs*bs;
1987     for (i=0; i<x->nz; i++) {
1988       j = 0;
1989       while (j < bs2){
1990         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1991         j++;
1992       }
1993     }
1994     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1995   } else {
1996     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1997   }
1998   PetscFunctionReturn(0);
1999 }
2000 
2001 #undef __FUNCT__
2002 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2003 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2004 {
2005   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2006   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2007   PetscScalar    *aa = a->a;
2008 
2009   PetscFunctionBegin;
2010   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2011   PetscFunctionReturn(0);
2012 }
2013 
2014 #undef __FUNCT__
2015 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2016 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2017 {
2018   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2019   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2020   PetscScalar    *aa = a->a;
2021 
2022   PetscFunctionBegin;
2023   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2024   PetscFunctionReturn(0);
2025 }
2026 
2027 
2028 /* -------------------------------------------------------------------*/
2029 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2030        MatGetRow_SeqBAIJ,
2031        MatRestoreRow_SeqBAIJ,
2032        MatMult_SeqBAIJ_N,
2033 /* 4*/ MatMultAdd_SeqBAIJ_N,
2034        MatMultTranspose_SeqBAIJ,
2035        MatMultTransposeAdd_SeqBAIJ,
2036        MatSolve_SeqBAIJ_N,
2037        0,
2038        0,
2039 /*10*/ 0,
2040        MatLUFactor_SeqBAIJ,
2041        0,
2042        0,
2043        MatTranspose_SeqBAIJ,
2044 /*15*/ MatGetInfo_SeqBAIJ,
2045        MatEqual_SeqBAIJ,
2046        MatGetDiagonal_SeqBAIJ,
2047        MatDiagonalScale_SeqBAIJ,
2048        MatNorm_SeqBAIJ,
2049 /*20*/ 0,
2050        MatAssemblyEnd_SeqBAIJ,
2051        0,
2052        MatSetOption_SeqBAIJ,
2053        MatZeroEntries_SeqBAIJ,
2054 /*25*/ MatZeroRows_SeqBAIJ,
2055        MatLUFactorSymbolic_SeqBAIJ,
2056        MatLUFactorNumeric_SeqBAIJ_N,
2057        MatCholeskyFactorSymbolic_SeqBAIJ,
2058        MatCholeskyFactorNumeric_SeqBAIJ_N,
2059 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2060        MatILUFactorSymbolic_SeqBAIJ,
2061        MatICCFactorSymbolic_SeqBAIJ,
2062        MatGetArray_SeqBAIJ,
2063        MatRestoreArray_SeqBAIJ,
2064 /*35*/ MatDuplicate_SeqBAIJ,
2065        0,
2066        0,
2067        MatILUFactor_SeqBAIJ,
2068        0,
2069 /*40*/ MatAXPY_SeqBAIJ,
2070        MatGetSubMatrices_SeqBAIJ,
2071        MatIncreaseOverlap_SeqBAIJ,
2072        MatGetValues_SeqBAIJ,
2073        MatCopy_SeqBAIJ,
2074 /*45*/ 0,
2075        MatScale_SeqBAIJ,
2076        0,
2077        0,
2078        0,
2079 /*50*/ 0,
2080        MatGetRowIJ_SeqBAIJ,
2081        MatRestoreRowIJ_SeqBAIJ,
2082        0,
2083        0,
2084 /*55*/ 0,
2085        0,
2086        0,
2087        0,
2088        MatSetValuesBlocked_SeqBAIJ,
2089 /*60*/ MatGetSubMatrix_SeqBAIJ,
2090        MatDestroy_SeqBAIJ,
2091        MatView_SeqBAIJ,
2092        0,
2093        0,
2094 /*65*/ 0,
2095        0,
2096        0,
2097        0,
2098        0,
2099 /*70*/ MatGetRowMax_SeqBAIJ,
2100        MatConvert_Basic,
2101        0,
2102        0,
2103        0,
2104 /*75*/ 0,
2105        0,
2106        0,
2107        0,
2108        0,
2109 /*80*/ 0,
2110        0,
2111        0,
2112        0,
2113        MatLoad_SeqBAIJ,
2114 /*85*/ 0,
2115        0,
2116        0,
2117        0,
2118        0,
2119 /*90*/ 0,
2120        0,
2121        0,
2122        0,
2123        0,
2124 /*95*/ 0,
2125        0,
2126        0,
2127        0,
2128        0,
2129 /*100*/0,
2130        0,
2131        0,
2132        0,
2133        0,
2134 /*105*/0,
2135        MatRealPart_SeqBAIJ,
2136        MatImaginaryPart_SeqBAIJ
2137 };
2138 
2139 EXTERN_C_BEGIN
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2142 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2143 {
2144   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2145   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2146   PetscErrorCode ierr;
2147 
2148   PetscFunctionBegin;
2149   if (aij->nonew != 1) {
2150     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2151   }
2152 
2153   /* allocate space for values if not already there */
2154   if (!aij->saved_values) {
2155     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2156   }
2157 
2158   /* copy values over */
2159   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2160   PetscFunctionReturn(0);
2161 }
2162 EXTERN_C_END
2163 
2164 EXTERN_C_BEGIN
2165 #undef __FUNCT__
2166 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2167 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2168 {
2169   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2170   PetscErrorCode ierr;
2171   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2172 
2173   PetscFunctionBegin;
2174   if (aij->nonew != 1) {
2175     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2176   }
2177   if (!aij->saved_values) {
2178     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2179   }
2180 
2181   /* copy values over */
2182   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2183   PetscFunctionReturn(0);
2184 }
2185 EXTERN_C_END
2186 
2187 EXTERN_C_BEGIN
2188 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2189 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2190 EXTERN_C_END
2191 
2192 EXTERN_C_BEGIN
2193 #undef __FUNCT__
2194 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2195 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2196 {
2197   Mat_SeqBAIJ    *b;
2198   PetscErrorCode ierr;
2199   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2200   PetscTruth     flg,skipallocation = PETSC_FALSE;
2201 
2202   PetscFunctionBegin;
2203 
2204   if (nz == MAT_SKIP_ALLOCATION) {
2205     skipallocation = PETSC_TRUE;
2206     nz             = 0;
2207   }
2208 
2209   ierr = PetscOptionsBegin(B->comm,B->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2210     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2211   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2212 
2213   if (nnz && newbs != bs) {
2214     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2215   }
2216   bs   = newbs;
2217 
2218   B->rmap.bs = B->cmap.bs = bs;
2219   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2220   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2221 
2222   B->preallocated = PETSC_TRUE;
2223 
2224   mbs  = B->rmap.n/bs;
2225   nbs  = B->cmap.n/bs;
2226   bs2  = bs*bs;
2227 
2228   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2229     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2230   }
2231 
2232   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2233   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2234   if (nnz) {
2235     for (i=0; i<mbs; i++) {
2236       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2237       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2238     }
2239   }
2240 
2241   b       = (Mat_SeqBAIJ*)B->data;
2242   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2243     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2244   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2245 
2246   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2247   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2248   if (!flg) {
2249     switch (bs) {
2250     case 1:
2251       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2252       B->ops->mult            = MatMult_SeqBAIJ_1;
2253       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2254       break;
2255     case 2:
2256       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2257       B->ops->mult            = MatMult_SeqBAIJ_2;
2258       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2259       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2260       break;
2261     case 3:
2262       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2263       B->ops->mult            = MatMult_SeqBAIJ_3;
2264       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2265       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2266       break;
2267     case 4:
2268       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2269       B->ops->mult            = MatMult_SeqBAIJ_4;
2270       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2271       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2272       break;
2273     case 5:
2274       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2275       B->ops->mult            = MatMult_SeqBAIJ_5;
2276       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2277       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2278       break;
2279     case 6:
2280       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2281       B->ops->mult            = MatMult_SeqBAIJ_6;
2282       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2283       break;
2284     case 7:
2285       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2286       B->ops->mult            = MatMult_SeqBAIJ_7;
2287       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2288       break;
2289     default:
2290       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2291       B->ops->mult            = MatMult_SeqBAIJ_N;
2292       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2293       break;
2294     }
2295   }
2296   B->rmap.bs      = bs;
2297   b->mbs     = mbs;
2298   b->nbs     = nbs;
2299   if (!skipallocation) {
2300     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2301     /* b->ilen will count nonzeros in each block row so far. */
2302     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2303     if (!nnz) {
2304       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2305       else if (nz <= 0)        nz = 1;
2306       for (i=0; i<mbs; i++) b->imax[i] = nz;
2307       nz = nz*mbs;
2308     } else {
2309       nz = 0;
2310       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2311     }
2312 
2313     /* allocate the matrix space */
2314     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2315     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2316     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2317     b->singlemalloc = PETSC_TRUE;
2318 
2319     b->i[0] = 0;
2320     for (i=1; i<mbs+1; i++) {
2321       b->i[i] = b->i[i-1] + b->imax[i-1];
2322     }
2323     b->free_a     = PETSC_TRUE;
2324     b->free_ij    = PETSC_TRUE;
2325   } else {
2326     b->free_a     = PETSC_FALSE;
2327     b->free_ij    = PETSC_FALSE;
2328   }
2329 
2330   B->rmap.bs          = bs;
2331   b->bs2              = bs2;
2332   b->mbs              = mbs;
2333   b->nz               = 0;
2334   b->maxnz            = nz*bs2;
2335   B->info.nz_unneeded = (PetscReal)b->maxnz;
2336   PetscFunctionReturn(0);
2337 }
2338 EXTERN_C_END
2339 
2340 /*MC
2341    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2342    block sparse compressed row format.
2343 
2344    Options Database Keys:
2345 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2346 
2347   Level: beginner
2348 
2349 .seealso: MatCreateSeqBAIJ()
2350 M*/
2351 
2352 EXTERN_C_BEGIN
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatCreate_SeqBAIJ"
2355 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2356 {
2357   PetscErrorCode ierr;
2358   PetscMPIInt    size;
2359   Mat_SeqBAIJ    *b;
2360 
2361   PetscFunctionBegin;
2362   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2363   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2364 
2365   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2366   B->data = (void*)b;
2367   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2368   B->factor           = 0;
2369   B->mapping          = 0;
2370   b->row              = 0;
2371   b->col              = 0;
2372   b->icol             = 0;
2373   b->reallocs         = 0;
2374   b->saved_values     = 0;
2375 #if defined(PETSC_USE_MAT_SINGLE)
2376   b->setvalueslen     = 0;
2377   b->setvaluescopy    = PETSC_NULL;
2378 #endif
2379 
2380   b->sorted           = PETSC_FALSE;
2381   b->roworiented      = PETSC_TRUE;
2382   b->nonew            = 0;
2383   b->diag             = 0;
2384   b->solve_work       = 0;
2385   b->mult_work        = 0;
2386   B->spptr            = 0;
2387   B->info.nz_unneeded = (PetscReal)b->maxnz;
2388   b->keepzeroedrows   = PETSC_FALSE;
2389   b->xtoy              = 0;
2390   b->XtoY              = 0;
2391   b->compressedrow.use     = PETSC_FALSE;
2392   b->compressedrow.nrows   = 0;
2393   b->compressedrow.i       = PETSC_NULL;
2394   b->compressedrow.rindex  = PETSC_NULL;
2395   b->compressedrow.checked = PETSC_FALSE;
2396   B->same_nonzero          = PETSC_FALSE;
2397 
2398   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2399                                      "MatInvertBlockDiagonal_SeqBAIJ",
2400                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2401   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2402                                      "MatStoreValues_SeqBAIJ",
2403                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2405                                      "MatRetrieveValues_SeqBAIJ",
2406                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2407   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2408                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2409                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2411                                      "MatConvert_SeqBAIJ_SeqAIJ",
2412                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2414                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2415                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2417                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2418                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2419   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 EXTERN_C_END
2423 
2424 #undef __FUNCT__
2425 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2426 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2427 {
2428   Mat            C;
2429   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2430   PetscErrorCode ierr;
2431   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2432 
2433   PetscFunctionBegin;
2434   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2435 
2436   *B = 0;
2437   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2438   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2439   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2440   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2441   c    = (Mat_SeqBAIJ*)C->data;
2442 
2443   C->rmap.N   = A->rmap.N;
2444   C->cmap.N   = A->cmap.N;
2445   C->rmap.bs  = A->rmap.bs;
2446   c->bs2 = a->bs2;
2447   c->mbs = a->mbs;
2448   c->nbs = a->nbs;
2449 
2450   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2451   for (i=0; i<mbs; i++) {
2452     c->imax[i] = a->imax[i];
2453     c->ilen[i] = a->ilen[i];
2454   }
2455 
2456   /* allocate the matrix space */
2457   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2458   c->singlemalloc = PETSC_TRUE;
2459   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2460   if (mbs > 0) {
2461     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2462     if (cpvalues == MAT_COPY_VALUES) {
2463       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2464     } else {
2465       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2466     }
2467   }
2468   c->sorted      = a->sorted;
2469   c->roworiented = a->roworiented;
2470   c->nonew       = a->nonew;
2471 
2472   if (a->diag) {
2473     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2474     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2475     for (i=0; i<mbs; i++) {
2476       c->diag[i] = a->diag[i];
2477     }
2478   } else c->diag        = 0;
2479   c->nz                 = a->nz;
2480   c->maxnz              = a->maxnz;
2481   c->solve_work         = 0;
2482   c->mult_work          = 0;
2483   c->free_a             = PETSC_TRUE;
2484   c->free_ij            = PETSC_TRUE;
2485   C->preallocated       = PETSC_TRUE;
2486   C->assembled          = PETSC_TRUE;
2487 
2488   c->compressedrow.use     = a->compressedrow.use;
2489   c->compressedrow.nrows   = a->compressedrow.nrows;
2490   c->compressedrow.checked = a->compressedrow.checked;
2491   if ( a->compressedrow.checked && a->compressedrow.use){
2492     i = a->compressedrow.nrows;
2493     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2494     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2495     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2496     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2497   } else {
2498     c->compressedrow.use    = PETSC_FALSE;
2499     c->compressedrow.i      = PETSC_NULL;
2500     c->compressedrow.rindex = PETSC_NULL;
2501   }
2502   C->same_nonzero = A->same_nonzero;
2503   *B = C;
2504   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2505   PetscFunctionReturn(0);
2506 }
2507 
2508 #undef __FUNCT__
2509 #define __FUNCT__ "MatLoad_SeqBAIJ"
2510 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2511 {
2512   Mat_SeqBAIJ    *a;
2513   Mat            B;
2514   PetscErrorCode ierr;
2515   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2516   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2517   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2518   PetscInt       *masked,nmask,tmp,bs2,ishift;
2519   PetscMPIInt    size;
2520   int            fd;
2521   PetscScalar    *aa;
2522   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2523 
2524   PetscFunctionBegin;
2525   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2526     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2527   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2528   bs2  = bs*bs;
2529 
2530   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2531   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2532   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2533   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2534   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2535   M = header[1]; N = header[2]; nz = header[3];
2536 
2537   if (header[3] < 0) {
2538     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2539   }
2540 
2541   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2542 
2543   /*
2544      This code adds extra rows to make sure the number of rows is
2545     divisible by the blocksize
2546   */
2547   mbs        = M/bs;
2548   extra_rows = bs - M + bs*(mbs);
2549   if (extra_rows == bs) extra_rows = 0;
2550   else                  mbs++;
2551   if (extra_rows) {
2552     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2553   }
2554 
2555   /* read in row lengths */
2556   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2557   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2558   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2559 
2560   /* read in column indices */
2561   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2562   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2563   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2564 
2565   /* loop over row lengths determining block row lengths */
2566   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2567   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2568   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2569   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2570   masked   = mask + mbs;
2571   rowcount = 0; nzcount = 0;
2572   for (i=0; i<mbs; i++) {
2573     nmask = 0;
2574     for (j=0; j<bs; j++) {
2575       kmax = rowlengths[rowcount];
2576       for (k=0; k<kmax; k++) {
2577         tmp = jj[nzcount++]/bs;
2578         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2579       }
2580       rowcount++;
2581     }
2582     browlengths[i] += nmask;
2583     /* zero out the mask elements we set */
2584     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2585   }
2586 
2587   /* create our matrix */
2588   ierr = MatCreate(comm,&B);
2589   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2590   ierr = MatSetType(B,type);CHKERRQ(ierr);
2591   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2592   a = (Mat_SeqBAIJ*)B->data;
2593 
2594   /* set matrix "i" values */
2595   a->i[0] = 0;
2596   for (i=1; i<= mbs; i++) {
2597     a->i[i]      = a->i[i-1] + browlengths[i-1];
2598     a->ilen[i-1] = browlengths[i-1];
2599   }
2600   a->nz         = 0;
2601   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2602 
2603   /* read in nonzero values */
2604   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2605   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2606   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2607 
2608   /* set "a" and "j" values into matrix */
2609   nzcount = 0; jcount = 0;
2610   for (i=0; i<mbs; i++) {
2611     nzcountb = nzcount;
2612     nmask    = 0;
2613     for (j=0; j<bs; j++) {
2614       kmax = rowlengths[i*bs+j];
2615       for (k=0; k<kmax; k++) {
2616         tmp = jj[nzcount++]/bs;
2617 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2618       }
2619     }
2620     /* sort the masked values */
2621     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2622 
2623     /* set "j" values into matrix */
2624     maskcount = 1;
2625     for (j=0; j<nmask; j++) {
2626       a->j[jcount++]  = masked[j];
2627       mask[masked[j]] = maskcount++;
2628     }
2629     /* set "a" values into matrix */
2630     ishift = bs2*a->i[i];
2631     for (j=0; j<bs; j++) {
2632       kmax = rowlengths[i*bs+j];
2633       for (k=0; k<kmax; k++) {
2634         tmp       = jj[nzcountb]/bs ;
2635         block     = mask[tmp] - 1;
2636         point     = jj[nzcountb] - bs*tmp;
2637         idx       = ishift + bs2*block + j + bs*point;
2638         a->a[idx] = (MatScalar)aa[nzcountb++];
2639       }
2640     }
2641     /* zero out the mask elements we set */
2642     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2643   }
2644   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2645 
2646   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2647   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2648   ierr = PetscFree(aa);CHKERRQ(ierr);
2649   ierr = PetscFree(jj);CHKERRQ(ierr);
2650   ierr = PetscFree(mask);CHKERRQ(ierr);
2651 
2652   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2653   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2654   ierr = MatView_Private(B);CHKERRQ(ierr);
2655 
2656   *A = B;
2657   PetscFunctionReturn(0);
2658 }
2659 
2660 #undef __FUNCT__
2661 #define __FUNCT__ "MatCreateSeqBAIJ"
2662 /*@C
2663    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2664    compressed row) format.  For good matrix assembly performance the
2665    user should preallocate the matrix storage by setting the parameter nz
2666    (or the array nnz).  By setting these parameters accurately, performance
2667    during matrix assembly can be increased by more than a factor of 50.
2668 
2669    Collective on MPI_Comm
2670 
2671    Input Parameters:
2672 +  comm - MPI communicator, set to PETSC_COMM_SELF
2673 .  bs - size of block
2674 .  m - number of rows
2675 .  n - number of columns
2676 .  nz - number of nonzero blocks  per block row (same for all rows)
2677 -  nnz - array containing the number of nonzero blocks in the various block rows
2678          (possibly different for each block row) or PETSC_NULL
2679 
2680    Output Parameter:
2681 .  A - the matrix
2682 
2683    Options Database Keys:
2684 .   -mat_no_unroll - uses code that does not unroll the loops in the
2685                      block calculations (much slower)
2686 .    -mat_block_size - size of the blocks to use
2687 
2688    Level: intermediate
2689 
2690    Notes:
2691    The number of rows and columns must be divisible by blocksize.
2692 
2693    If the nnz parameter is given then the nz parameter is ignored
2694 
2695    A nonzero block is any block that as 1 or more nonzeros in it
2696 
2697    The block AIJ format is fully compatible with standard Fortran 77
2698    storage.  That is, the stored row and column indices can begin at
2699    either one (as in Fortran) or zero.  See the users' manual for details.
2700 
2701    Specify the preallocated storage with either nz or nnz (not both).
2702    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2703    allocation.  For additional details, see the users manual chapter on
2704    matrices.
2705 
2706 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2707 @*/
2708 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2709 {
2710   PetscErrorCode ierr;
2711 
2712   PetscFunctionBegin;
2713   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2714   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2715   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2716   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2717   PetscFunctionReturn(0);
2718 }
2719 
2720 #undef __FUNCT__
2721 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2722 /*@C
2723    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2724    per row in the matrix. For good matrix assembly performance the
2725    user should preallocate the matrix storage by setting the parameter nz
2726    (or the array nnz).  By setting these parameters accurately, performance
2727    during matrix assembly can be increased by more than a factor of 50.
2728 
2729    Collective on MPI_Comm
2730 
2731    Input Parameters:
2732 +  A - the matrix
2733 .  bs - size of block
2734 .  nz - number of block nonzeros per block row (same for all rows)
2735 -  nnz - array containing the number of block nonzeros in the various block rows
2736          (possibly different for each block row) or PETSC_NULL
2737 
2738    Options Database Keys:
2739 .   -mat_no_unroll - uses code that does not unroll the loops in the
2740                      block calculations (much slower)
2741 .    -mat_block_size - size of the blocks to use
2742 
2743    Level: intermediate
2744 
2745    Notes:
2746    If the nnz parameter is given then the nz parameter is ignored
2747 
2748    The block AIJ format is fully compatible with standard Fortran 77
2749    storage.  That is, the stored row and column indices can begin at
2750    either one (as in Fortran) or zero.  See the users' manual for details.
2751 
2752    Specify the preallocated storage with either nz or nnz (not both).
2753    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2754    allocation.  For additional details, see the users manual chapter on
2755    matrices.
2756 
2757 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2758 @*/
2759 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2760 {
2761   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2762 
2763   PetscFunctionBegin;
2764   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2765   if (f) {
2766     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2767   }
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2773 /*@
2774      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2775               (upper triangular entries in CSR format) provided by the user.
2776 
2777      Collective on MPI_Comm
2778 
2779    Input Parameters:
2780 +  comm - must be an MPI communicator of size 1
2781 .  bs - size of block
2782 .  m - number of rows
2783 .  n - number of columns
2784 .  i - row indices
2785 .  j - column indices
2786 -  a - matrix values
2787 
2788    Output Parameter:
2789 .  mat - the matrix
2790 
2791    Level: intermediate
2792 
2793    Notes:
2794        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2795     once the matrix is destroyed
2796 
2797        You cannot set new nonzero locations into this matrix, that will generate an error.
2798 
2799        The i and j indices are 0 based
2800 
2801 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2802 
2803 @*/
2804 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2805 {
2806   PetscErrorCode ierr;
2807   PetscInt       ii;
2808   Mat_SeqBAIJ    *baij;
2809 
2810   PetscFunctionBegin;
2811   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2812   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2813 
2814   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2815   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2816   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2817   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2818   baij = (Mat_SeqBAIJ*)(*mat)->data;
2819   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2820 
2821   baij->i = i;
2822   baij->j = j;
2823   baij->a = a;
2824   baij->singlemalloc = PETSC_FALSE;
2825   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2826   baij->free_a       = PETSC_FALSE;
2827   baij->free_ij       = PETSC_FALSE;
2828 
2829   for (ii=0; ii<m; ii++) {
2830     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2831 #if defined(PETSC_USE_DEBUG)
2832     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2833 #endif
2834   }
2835 #if defined(PETSC_USE_DEBUG)
2836   for (ii=0; ii<baij->i[m]; ii++) {
2837     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2838     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2839   }
2840 #endif
2841 
2842   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2843   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2844   PetscFunctionReturn(0);
2845 }
2846