xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 290bbb0a1dcfb34dbf94efcfcc44171581b0efea)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "src/mat/impls/baij/seq/baij.h"
8 #include "src/inline/spops.h"
9 #include "petscsys.h"                     /*I "petscmat.h" I*/
10 
11 #include "src/inline/ilu.h"
12 
13 #undef __FUNCT__
14 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
15 /*@C
16   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
17 
18   Collective on Mat
19 
20   Input Parameters:
21 . mat - the matrix
22 
23   Level: advanced
24 @*/
25 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
26 {
27   PetscErrorCode ierr,(*f)(Mat);
28 
29   PetscFunctionBegin;
30   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
31   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
32   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
33 
34   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
35   if (f) {
36     ierr = (*f)(mat);CHKERRQ(ierr);
37   } else {
38     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
39   }
40   PetscFunctionReturn(0);
41 }
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
46 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
47 {
48   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
49   PetscErrorCode ierr;
50   PetscInt       *diag_offset,i,bs = A->rmap.bs,mbs = a->mbs;
51   PetscScalar    *v = a->a,*odiag,*diag,*mdiag;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     default:
109       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
110   }
111   a->idiagvalid = PETSC_TRUE;
112   PetscFunctionReturn(0);
113 }
114 EXTERN_C_END
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
118 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
119 {
120   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
121   PetscScalar        *x,x1,x2,s1,s2;
122   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
123   PetscErrorCode     ierr;
124   PetscInt           m = a->mbs,i,i2,nz,idx;
125   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
126 
127   PetscFunctionBegin;
128   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
129   its = its*lits;
130   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
131   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
132   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
133   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
134   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
135 
136   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
137 
138   diag  = a->diag;
139   idiag = a->idiag;
140   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
141   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
142 
143   if (flag & SOR_ZERO_INITIAL_GUESS) {
144     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
145       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
146       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
147       i2     = 2;
148       idiag += 4;
149       for (i=1; i<m; i++) {
150 	v     = aa + 4*ai[i];
151 	vi    = aj + ai[i];
152 	nz    = diag[i] - ai[i];
153 	s1    = b[i2]; s2 = b[i2+1];
154 	while (nz--) {
155 	  idx  = 2*(*vi++);
156 	  x1   = x[idx]; x2 = x[1+idx];
157 	  s1  -= v[0]*x1 + v[2]*x2;
158 	  s2  -= v[1]*x1 + v[3]*x2;
159 	  v   += 4;
160 	}
161 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
162 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
163         idiag   += 4;
164         i2      += 2;
165       }
166       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
167       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
168     }
169     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
170         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
171       i2    = 0;
172       mdiag = a->idiag+4*a->mbs;
173       for (i=0; i<m; i++) {
174         x1      = x[i2]; x2 = x[i2+1];
175         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
176         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
177         mdiag  += 4;
178         i2     += 2;
179       }
180       ierr = PetscLogFlops(6*m);CHKERRQ(ierr);
181     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
182       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
183     }
184     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
185       idiag   = a->idiag+4*a->mbs - 4;
186       i2      = 2*m - 2;
187       x1      = x[i2]; x2 = x[i2+1];
188       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
189       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
190       idiag -= 4;
191       i2    -= 2;
192       for (i=m-2; i>=0; i--) {
193 	v     = aa + 4*(diag[i]+1);
194 	vi    = aj + diag[i] + 1;
195 	nz    = ai[i+1] - diag[i] - 1;
196 	s1    = x[i2]; s2 = x[i2+1];
197 	while (nz--) {
198 	  idx  = 2*(*vi++);
199 	  x1   = x[idx]; x2 = x[1+idx];
200 	  s1  -= v[0]*x1 + v[2]*x2;
201 	  s2  -= v[1]*x1 + v[3]*x2;
202 	  v   += 4;
203 	}
204 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
205 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
206         idiag   -= 4;
207         i2      -= 2;
208       }
209       ierr = PetscLogFlops(4*(a->nz));CHKERRQ(ierr);
210     }
211   } else {
212     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
213   }
214   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
215   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
216   PetscFunctionReturn(0);
217 }
218 
219 #undef __FUNCT__
220 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
221 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
222 {
223   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
224   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
225   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   its = its*lits;
232   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
233   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
234   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
235   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
236   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
237 
238   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
239 
240   diag  = a->diag;
241   idiag = a->idiag;
242   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
243   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
244 
245   if (flag & SOR_ZERO_INITIAL_GUESS) {
246     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
247       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
248       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
249       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
250       i2     = 3;
251       idiag += 9;
252       for (i=1; i<m; i++) {
253 	v     = aa + 9*ai[i];
254 	vi    = aj + ai[i];
255 	nz    = diag[i] - ai[i];
256 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
257 	while (nz--) {
258 	  idx  = 3*(*vi++);
259 	  x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
260 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
261 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
262 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
263 	  v   += 9;
264 	}
265 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
266 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
267 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
268         idiag   += 9;
269         i2      += 3;
270       }
271       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
272       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
273     }
274     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
275         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
276       i2    = 0;
277       mdiag = a->idiag+9*a->mbs;
278       for (i=0; i<m; i++) {
279         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
280         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
281         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
282         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
283         mdiag  += 9;
284         i2     += 3;
285       }
286       ierr = PetscLogFlops(15*m);CHKERRQ(ierr);
287     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
289     }
290     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
291       idiag   = a->idiag+9*a->mbs - 9;
292       i2      = 3*m - 3;
293       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
294       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
295       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
296       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
297       idiag -= 9;
298       i2    -= 3;
299       for (i=m-2; i>=0; i--) {
300 	v     = aa + 9*(diag[i]+1);
301 	vi    = aj + diag[i] + 1;
302 	nz    = ai[i+1] - diag[i] - 1;
303 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
304 	while (nz--) {
305 	  idx  = 3*(*vi++);
306 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
307 	  s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
308 	  s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
309 	  s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
310 	  v   += 9;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
313 	x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
314 	x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
315         idiag   -= 9;
316         i2      -= 3;
317       }
318       ierr = PetscLogFlops(9*(a->nz));CHKERRQ(ierr);
319     }
320   } else {
321     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
322   }
323   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
324   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
325   PetscFunctionReturn(0);
326 }
327 
328 #undef __FUNCT__
329 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
330 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
331 {
332   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
333   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
334   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
342   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
343   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
344   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
345   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
346 
347   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
348 
349   diag  = a->diag;
350   idiag = a->idiag;
351   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
352   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
353 
354   if (flag & SOR_ZERO_INITIAL_GUESS) {
355     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
356       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
357       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
358       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
359       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
360       i2     = 4;
361       idiag += 16;
362       for (i=1; i<m; i++) {
363 	v     = aa + 16*ai[i];
364 	vi    = aj + ai[i];
365 	nz    = diag[i] - ai[i];
366 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
367 	while (nz--) {
368 	  idx  = 4*(*vi++);
369 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
370 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
371 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
372 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
373 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
374 	  v   += 16;
375 	}
376 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
377 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
378 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
379 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
380         idiag   += 16;
381         i2      += 4;
382       }
383       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
384       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
385     }
386     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
387         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
388       i2    = 0;
389       mdiag = a->idiag+16*a->mbs;
390       for (i=0; i<m; i++) {
391         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
392         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
393         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
394         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
395         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
396         mdiag  += 16;
397         i2     += 4;
398       }
399       ierr = PetscLogFlops(28*m);CHKERRQ(ierr);
400     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
401       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
402     }
403     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
404       idiag   = a->idiag+16*a->mbs - 16;
405       i2      = 4*m - 4;
406       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
407       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
408       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
409       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
410       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
411       idiag -= 16;
412       i2    -= 4;
413       for (i=m-2; i>=0; i--) {
414 	v     = aa + 16*(diag[i]+1);
415 	vi    = aj + diag[i] + 1;
416 	nz    = ai[i+1] - diag[i] - 1;
417 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
418 	while (nz--) {
419 	  idx  = 4*(*vi++);
420 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
421 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
422 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
423 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
424 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
425 	  v   += 16;
426 	}
427 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
428 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
429 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
430 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
431         idiag   -= 16;
432         i2      -= 4;
433       }
434       ierr = PetscLogFlops(16*(a->nz));CHKERRQ(ierr);
435     }
436   } else {
437     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
438   }
439   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
440   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
441   PetscFunctionReturn(0);
442 }
443 
444 #undef __FUNCT__
445 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
446 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
447 {
448   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
449   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
450   const PetscScalar  *v,*aa = a->a, *b, *idiag,*mdiag;
451   PetscErrorCode     ierr;
452   PetscInt           m = a->mbs,i,i2,nz,idx;
453   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
454 
455   PetscFunctionBegin;
456   its = its*lits;
457   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
458   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
459   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
460   if ((flag & SOR_EISENSTAT) ||(flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER) ) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for Eisenstat trick");
461   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
462 
463   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
464 
465   diag  = a->diag;
466   idiag = a->idiag;
467   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
468   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
469 
470   if (flag & SOR_ZERO_INITIAL_GUESS) {
471     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
472       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
473       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
474       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
475       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
476       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
477       i2     = 5;
478       idiag += 25;
479       for (i=1; i<m; i++) {
480 	v     = aa + 25*ai[i];
481 	vi    = aj + ai[i];
482 	nz    = diag[i] - ai[i];
483 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
484 	while (nz--) {
485 	  idx  = 5*(*vi++);
486 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
487 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
488 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
489 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
490 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
491 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
492 	  v   += 25;
493 	}
494 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
495 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
496 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
497 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
498 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
499         idiag   += 25;
500         i2      += 5;
501       }
502       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
503       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
504     }
505     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
506         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
507       i2    = 0;
508       mdiag = a->idiag+25*a->mbs;
509       for (i=0; i<m; i++) {
510         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
511         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
512         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
513         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
514         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
515         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
516         mdiag  += 25;
517         i2     += 5;
518       }
519       ierr = PetscLogFlops(45*m);CHKERRQ(ierr);
520     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
521       ierr = PetscMemcpy(x,b,A->rmap.N*sizeof(PetscScalar));CHKERRQ(ierr);
522     }
523     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
524       idiag   = a->idiag+25*a->mbs - 25;
525       i2      = 5*m - 5;
526       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
527       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
528       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
529       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
530       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
531       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
532       idiag -= 25;
533       i2    -= 5;
534       for (i=m-2; i>=0; i--) {
535 	v     = aa + 25*(diag[i]+1);
536 	vi    = aj + diag[i] + 1;
537 	nz    = ai[i+1] - diag[i] - 1;
538 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
539 	while (nz--) {
540 	  idx  = 5*(*vi++);
541 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
542 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
543 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
544 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
545 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
546 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
547 	  v   += 25;
548 	}
549 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
550 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
551 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
552 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
553 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
554         idiag   -= 25;
555         i2      -= 5;
556       }
557       ierr = PetscLogFlops(25*(a->nz));CHKERRQ(ierr);
558     }
559   } else {
560     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
561   }
562   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
563   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
564   PetscFunctionReturn(0);
565 }
566 
567 /*
568     Special version for direct calls from Fortran (Used in PETSc-fun3d)
569 */
570 #if defined(PETSC_HAVE_FORTRAN_CAPS)
571 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
572 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
573 #define matsetvaluesblocked4_ matsetvaluesblocked4
574 #endif
575 
576 EXTERN_C_BEGIN
577 #undef __FUNCT__
578 #define __FUNCT__ "matsetvaluesblocked4_"
579 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
580 {
581   Mat               A = *AA;
582   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
583   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
584   PetscInt          *ai=a->i,*ailen=a->ilen;
585   PetscInt          *aj=a->j,stepval,lastcol = -1;
586   const PetscScalar *value = v;
587   MatScalar         *ap,*aa = a->a,*bap;
588 
589   PetscFunctionBegin;
590   if (A->rmap.bs != 4) SETERRABORT(A->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
591   stepval = (n-1)*4;
592   for (k=0; k<m; k++) { /* loop over added rows */
593     row  = im[k];
594     rp   = aj + ai[row];
595     ap   = aa + 16*ai[row];
596     nrow = ailen[row];
597     low  = 0;
598     high = nrow;
599     for (l=0; l<n; l++) { /* loop over added columns */
600       col = in[l];
601       if (col <= lastcol) low = 0; else high = nrow;
602       lastcol = col;
603       value = v + k*(stepval+4 + l)*4;
604       while (high-low > 7) {
605         t = (low+high)/2;
606         if (rp[t] > col) high = t;
607         else             low  = t;
608       }
609       for (i=low; i<high; i++) {
610         if (rp[i] > col) break;
611         if (rp[i] == col) {
612           bap  = ap +  16*i;
613           for (ii=0; ii<4; ii++,value+=stepval) {
614             for (jj=ii; jj<16; jj+=4) {
615               bap[jj] += *value++;
616             }
617           }
618           goto noinsert2;
619         }
620       }
621       N = nrow++ - 1;
622       high++; /* added new column index thus must search to one higher than before */
623       /* shift up all the later entries in this row */
624       for (ii=N; ii>=i; ii--) {
625         rp[ii+1] = rp[ii];
626         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
627       }
628       if (N >= i) {
629         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
630       }
631       rp[i] = col;
632       bap   = ap +  16*i;
633       for (ii=0; ii<4; ii++,value+=stepval) {
634         for (jj=ii; jj<16; jj+=4) {
635           bap[jj] = *value++;
636         }
637       }
638       noinsert2:;
639       low = i;
640     }
641     ailen[row] = nrow;
642   }
643   PetscFunctionReturnVoid();
644 }
645 EXTERN_C_END
646 
647 #if defined(PETSC_HAVE_FORTRAN_CAPS)
648 #define matsetvalues4_ MATSETVALUES4
649 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
650 #define matsetvalues4_ matsetvalues4
651 #endif
652 
653 EXTERN_C_BEGIN
654 #undef __FUNCT__
655 #define __FUNCT__ "MatSetValues4_"
656 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
657 {
658   Mat         A = *AA;
659   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
660   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
661   PetscInt    *ai=a->i,*ailen=a->ilen;
662   PetscInt    *aj=a->j,brow,bcol;
663   PetscInt    ridx,cidx,lastcol = -1;
664   MatScalar   *ap,value,*aa=a->a,*bap;
665 
666   PetscFunctionBegin;
667   for (k=0; k<m; k++) { /* loop over added rows */
668     row  = im[k]; brow = row/4;
669     rp   = aj + ai[brow];
670     ap   = aa + 16*ai[brow];
671     nrow = ailen[brow];
672     low  = 0;
673     high = nrow;
674     for (l=0; l<n; l++) { /* loop over added columns */
675       col = in[l]; bcol = col/4;
676       ridx = row % 4; cidx = col % 4;
677       value = v[l + k*n];
678       if (col <= lastcol) low = 0; else high = nrow;
679       lastcol = col;
680       while (high-low > 7) {
681         t = (low+high)/2;
682         if (rp[t] > bcol) high = t;
683         else              low  = t;
684       }
685       for (i=low; i<high; i++) {
686         if (rp[i] > bcol) break;
687         if (rp[i] == bcol) {
688           bap  = ap +  16*i + 4*cidx + ridx;
689           *bap += value;
690           goto noinsert1;
691         }
692       }
693       N = nrow++ - 1;
694       high++; /* added new column thus must search to one higher than before */
695       /* shift up all the later entries in this row */
696       for (ii=N; ii>=i; ii--) {
697         rp[ii+1] = rp[ii];
698         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
699       }
700       if (N>=i) {
701         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
702       }
703       rp[i]                    = bcol;
704       ap[16*i + 4*cidx + ridx] = value;
705       noinsert1:;
706       low = i;
707     }
708     ailen[brow] = nrow;
709   }
710   PetscFunctionReturnVoid();
711 }
712 EXTERN_C_END
713 
714 /*  UGLY, ugly, ugly
715    When MatScalar == PetscScalar the function MatSetValuesBlocked_SeqBAIJ_MatScalar() does
716    not exist. Otherwise ..._MatScalar() takes matrix dlements in single precision and
717    inserts them into the single precision data structure. The function MatSetValuesBlocked_SeqBAIJ()
718    converts the entries into single precision and then calls ..._MatScalar() to put them
719    into the single precision data structures.
720 */
721 #if defined(PETSC_USE_MAT_SINGLE)
722 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
723 #else
724 #define MatSetValuesBlocked_SeqBAIJ_MatScalar MatSetValuesBlocked_SeqBAIJ
725 #endif
726 
727 #define CHUNKSIZE  10
728 
729 /*
730      Checks for missing diagonals
731 */
732 #undef __FUNCT__
733 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
734 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A)
735 {
736   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
737   PetscErrorCode ierr;
738   PetscInt       *diag,*jj = a->j,i;
739 
740   PetscFunctionBegin;
741   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
742   diag = a->diag;
743   for (i=0; i<a->mbs; i++) {
744     if (jj[diag[i]] != i) {
745       SETERRQ1(PETSC_ERR_PLIB,"Matrix is missing diagonal number %D",i);
746     }
747   }
748   PetscFunctionReturn(0);
749 }
750 
751 #undef __FUNCT__
752 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
753 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
754 {
755   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
756   PetscErrorCode ierr;
757   PetscInt       i,j,m = a->mbs;
758 
759   PetscFunctionBegin;
760   if (!a->diag) {
761     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
762   }
763   for (i=0; i<m; i++) {
764     a->diag[i] = a->i[i+1];
765     for (j=a->i[i]; j<a->i[i+1]; j++) {
766       if (a->j[j] == i) {
767         a->diag[i] = j;
768         break;
769       }
770     }
771   }
772   PetscFunctionReturn(0);
773 }
774 
775 
776 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
777 
778 #undef __FUNCT__
779 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
780 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
781 {
782   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
783   PetscErrorCode ierr;
784   PetscInt       n = a->mbs,i;
785 
786   PetscFunctionBegin;
787   *nn = n;
788   if (!ia) PetscFunctionReturn(0);
789   if (symmetric) {
790     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,oshift,ia,ja);CHKERRQ(ierr);
791   } else if (oshift == 1) {
792     /* temporarily add 1 to i and j indices */
793     PetscInt nz = a->i[n];
794     for (i=0; i<nz; i++) a->j[i]++;
795     for (i=0; i<n+1; i++) a->i[i]++;
796     *ia = a->i; *ja = a->j;
797   } else {
798     *ia = a->i; *ja = a->j;
799   }
800 
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
806 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
807 {
808   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
809   PetscErrorCode ierr;
810   PetscInt       i,n = a->mbs;
811 
812   PetscFunctionBegin;
813   if (!ia) PetscFunctionReturn(0);
814   if (symmetric) {
815     ierr = PetscFree(*ia);CHKERRQ(ierr);
816     ierr = PetscFree(*ja);CHKERRQ(ierr);
817   } else if (oshift == 1) {
818     PetscInt nz = a->i[n]-1;
819     for (i=0; i<nz; i++) a->j[i]--;
820     for (i=0; i<n+1; i++) a->i[i]--;
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_SeqBAIJ"
827 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
828 {
829   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap.N,A->cmap.n,a->nz);
835 #endif
836   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
837   if (a->row) {
838     ierr = ISDestroy(a->row);CHKERRQ(ierr);
839   }
840   if (a->col) {
841     ierr = ISDestroy(a->col);CHKERRQ(ierr);
842   }
843   ierr = PetscFree(a->diag);CHKERRQ(ierr);
844   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
845   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
846   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
847   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
848   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
849   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
850 #if defined(PETSC_USE_MAT_SINGLE)
851   ierr = PetscFree(a->setvaluescopy);CHKERRQ(ierr);
852 #endif
853   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
854   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
855 
856   ierr = PetscFree(a);CHKERRQ(ierr);
857 
858   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
860   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
861   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
862   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
863   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
864   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatSetOption_SeqBAIJ"
870 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op)
871 {
872   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
873   PetscErrorCode ierr;
874 
875   PetscFunctionBegin;
876   switch (op) {
877   case MAT_ROW_ORIENTED:
878     a->roworiented    = PETSC_TRUE;
879     break;
880   case MAT_COLUMN_ORIENTED:
881     a->roworiented    = PETSC_FALSE;
882     break;
883   case MAT_COLUMNS_SORTED:
884     a->sorted         = PETSC_TRUE;
885     break;
886   case MAT_COLUMNS_UNSORTED:
887     a->sorted         = PETSC_FALSE;
888     break;
889   case MAT_KEEP_ZEROED_ROWS:
890     a->keepzeroedrows = PETSC_TRUE;
891     break;
892   case MAT_NO_NEW_NONZERO_LOCATIONS:
893     a->nonew          = 1;
894     break;
895   case MAT_NEW_NONZERO_LOCATION_ERR:
896     a->nonew          = -1;
897     break;
898   case MAT_NEW_NONZERO_ALLOCATION_ERR:
899     a->nonew          = -2;
900     break;
901   case MAT_YES_NEW_NONZERO_LOCATIONS:
902     a->nonew          = 0;
903     break;
904   case MAT_ROWS_SORTED:
905   case MAT_ROWS_UNSORTED:
906   case MAT_YES_NEW_DIAGONALS:
907   case MAT_IGNORE_OFF_PROC_ENTRIES:
908   case MAT_USE_HASH_TABLE:
909     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
910     break;
911   case MAT_NO_NEW_DIAGONALS:
912     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
913   case MAT_SYMMETRIC:
914   case MAT_STRUCTURALLY_SYMMETRIC:
915   case MAT_NOT_SYMMETRIC:
916   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
917   case MAT_HERMITIAN:
918   case MAT_NOT_HERMITIAN:
919   case MAT_SYMMETRY_ETERNAL:
920   case MAT_NOT_SYMMETRY_ETERNAL:
921     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
922     break;
923   default:
924     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
925   }
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatGetRow_SeqBAIJ"
931 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
932 {
933   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
934   PetscErrorCode ierr;
935   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
936   MatScalar      *aa,*aa_i;
937   PetscScalar    *v_i;
938 
939   PetscFunctionBegin;
940   bs  = A->rmap.bs;
941   ai  = a->i;
942   aj  = a->j;
943   aa  = a->a;
944   bs2 = a->bs2;
945 
946   if (row < 0 || row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
947 
948   bn  = row/bs;   /* Block number */
949   bp  = row % bs; /* Block Position */
950   M   = ai[bn+1] - ai[bn];
951   *nz = bs*M;
952 
953   if (v) {
954     *v = 0;
955     if (*nz) {
956       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
957       for (i=0; i<M; i++) { /* for each block in the block row */
958         v_i  = *v + i*bs;
959         aa_i = aa + bs2*(ai[bn] + i);
960         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
961       }
962     }
963   }
964 
965   if (idx) {
966     *idx = 0;
967     if (*nz) {
968       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
969       for (i=0; i<M; i++) { /* for each block in the block row */
970         idx_i = *idx + i*bs;
971         itmp  = bs*aj[ai[bn] + i];
972         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
973       }
974     }
975   }
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
981 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
982 {
983   PetscErrorCode ierr;
984 
985   PetscFunctionBegin;
986   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
987   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatTranspose_SeqBAIJ"
993 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,Mat *B)
994 {
995   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
996   Mat            C;
997   PetscErrorCode ierr;
998   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap.bs,mbs=a->mbs,nbs=a->nbs,len,*col;
999   PetscInt       *rows,*cols,bs2=a->bs2;
1000   PetscScalar    *array;
1001 
1002   PetscFunctionBegin;
1003   if (!B && mbs!=nbs) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Square matrix only for in-place");
1004   ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1005   ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1006 
1007 #if defined(PETSC_USE_MAT_SINGLE)
1008   ierr = PetscMalloc(a->bs2*a->nz*sizeof(PetscScalar),&array);CHKERRQ(ierr);
1009   for (i=0; i<a->bs2*a->nz; i++) array[i] = (PetscScalar)a->a[i];
1010 #else
1011   array = a->a;
1012 #endif
1013 
1014   for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1015   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
1016   ierr = MatSetSizes(C,A->cmap.n,A->rmap.N,A->cmap.n,A->rmap.N);CHKERRQ(ierr);
1017   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
1018   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1019   ierr = PetscFree(col);CHKERRQ(ierr);
1020   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1021   cols = rows + bs;
1022   for (i=0; i<mbs; i++) {
1023     cols[0] = i*bs;
1024     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1025     len = ai[i+1] - ai[i];
1026     for (j=0; j<len; j++) {
1027       rows[0] = (*aj++)*bs;
1028       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1029       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1030       array += bs2;
1031     }
1032   }
1033   ierr = PetscFree(rows);CHKERRQ(ierr);
1034 #if defined(PETSC_USE_MAT_SINGLE)
1035   ierr = PetscFree(array);
1036 #endif
1037 
1038   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1039   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1040 
1041   if (B) {
1042     *B = C;
1043   } else {
1044     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1045   }
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 #undef __FUNCT__
1050 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1051 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1052 {
1053   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1054   PetscErrorCode ierr;
1055   PetscInt       i,*col_lens,bs = A->rmap.bs,count,*jj,j,k,l,bs2=a->bs2;
1056   int            fd;
1057   PetscScalar    *aa;
1058   FILE           *file;
1059 
1060   PetscFunctionBegin;
1061   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1062   ierr        = PetscMalloc((4+A->rmap.N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1063   col_lens[0] = MAT_FILE_COOKIE;
1064 
1065   col_lens[1] = A->rmap.N;
1066   col_lens[2] = A->cmap.n;
1067   col_lens[3] = a->nz*bs2;
1068 
1069   /* store lengths of each row and write (including header) to file */
1070   count = 0;
1071   for (i=0; i<a->mbs; i++) {
1072     for (j=0; j<bs; j++) {
1073       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1074     }
1075   }
1076   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap.N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1077   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1078 
1079   /* store column indices (zero start index) */
1080   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1081   count = 0;
1082   for (i=0; i<a->mbs; i++) {
1083     for (j=0; j<bs; j++) {
1084       for (k=a->i[i]; k<a->i[i+1]; k++) {
1085         for (l=0; l<bs; l++) {
1086           jj[count++] = bs*a->j[k] + l;
1087         }
1088       }
1089     }
1090   }
1091   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1092   ierr = PetscFree(jj);CHKERRQ(ierr);
1093 
1094   /* store nonzero values */
1095   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1096   count = 0;
1097   for (i=0; i<a->mbs; i++) {
1098     for (j=0; j<bs; j++) {
1099       for (k=a->i[i]; k<a->i[i+1]; k++) {
1100         for (l=0; l<bs; l++) {
1101           aa[count++] = a->a[bs2*k + l*bs + j];
1102         }
1103       }
1104     }
1105   }
1106   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1107   ierr = PetscFree(aa);CHKERRQ(ierr);
1108 
1109   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1110   if (file) {
1111     fprintf(file,"-matload_block_size %d\n",(int)A->rmap.bs);
1112   }
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1118 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1119 {
1120   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1121   PetscErrorCode    ierr;
1122   PetscInt          i,j,bs = A->rmap.bs,k,l,bs2=a->bs2;
1123   PetscViewerFormat format;
1124 
1125   PetscFunctionBegin;
1126   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1127   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1128     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1129   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1130     Mat aij;
1131     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1132     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1133     ierr = MatDestroy(aij);CHKERRQ(ierr);
1134   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1135      PetscFunctionReturn(0);
1136   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1137     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1138     for (i=0; i<a->mbs; i++) {
1139       for (j=0; j<bs; j++) {
1140         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1141         for (k=a->i[i]; k<a->i[i+1]; k++) {
1142           for (l=0; l<bs; l++) {
1143 #if defined(PETSC_USE_COMPLEX)
1144             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1145               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1146                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1147             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1148               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1149                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1150             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1151               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1152             }
1153 #else
1154             if (a->a[bs2*k + l*bs + j] != 0.0) {
1155               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1156             }
1157 #endif
1158           }
1159         }
1160         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1161       }
1162     }
1163     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1164   } else {
1165     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1166     for (i=0; i<a->mbs; i++) {
1167       for (j=0; j<bs; j++) {
1168         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1169         for (k=a->i[i]; k<a->i[i+1]; k++) {
1170           for (l=0; l<bs; l++) {
1171 #if defined(PETSC_USE_COMPLEX)
1172             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1173               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1174                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1175             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1176               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1177                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1178             } else {
1179               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1180             }
1181 #else
1182             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1183 #endif
1184           }
1185         }
1186         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1187       }
1188     }
1189     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1190   }
1191   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1197 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1198 {
1199   Mat            A = (Mat) Aa;
1200   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1201   PetscErrorCode ierr;
1202   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap.bs,bs2=a->bs2;
1203   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1204   MatScalar      *aa;
1205   PetscViewer    viewer;
1206 
1207   PetscFunctionBegin;
1208 
1209   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1210   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1211 
1212   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1213 
1214   /* loop over matrix elements drawing boxes */
1215   color = PETSC_DRAW_BLUE;
1216   for (i=0,row=0; i<mbs; i++,row+=bs) {
1217     for (j=a->i[i]; j<a->i[i+1]; j++) {
1218       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1219       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1220       aa = a->a + j*bs2;
1221       for (k=0; k<bs; k++) {
1222         for (l=0; l<bs; l++) {
1223           if (PetscRealPart(*aa++) >=  0.) continue;
1224           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1225         }
1226       }
1227     }
1228   }
1229   color = PETSC_DRAW_CYAN;
1230   for (i=0,row=0; i<mbs; i++,row+=bs) {
1231     for (j=a->i[i]; j<a->i[i+1]; j++) {
1232       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1233       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1234       aa = a->a + j*bs2;
1235       for (k=0; k<bs; k++) {
1236         for (l=0; l<bs; l++) {
1237           if (PetscRealPart(*aa++) != 0.) continue;
1238           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1239         }
1240       }
1241     }
1242   }
1243 
1244   color = PETSC_DRAW_RED;
1245   for (i=0,row=0; i<mbs; i++,row+=bs) {
1246     for (j=a->i[i]; j<a->i[i+1]; j++) {
1247       y_l = A->rmap.N - row - 1.0; y_r = y_l + 1.0;
1248       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1249       aa = a->a + j*bs2;
1250       for (k=0; k<bs; k++) {
1251         for (l=0; l<bs; l++) {
1252           if (PetscRealPart(*aa++) <= 0.) continue;
1253           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1254         }
1255       }
1256     }
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1263 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1264 {
1265   PetscErrorCode ierr;
1266   PetscReal      xl,yl,xr,yr,w,h;
1267   PetscDraw      draw;
1268   PetscTruth     isnull;
1269 
1270   PetscFunctionBegin;
1271 
1272   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1273   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1274 
1275   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1276   xr  = A->cmap.n; yr = A->rmap.N; h = yr/10.0; w = xr/10.0;
1277   xr += w;    yr += h;  xl = -w;     yl = -h;
1278   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1279   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1280   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatView_SeqBAIJ"
1286 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1287 {
1288   PetscErrorCode ierr;
1289   PetscTruth     iascii,isbinary,isdraw;
1290 
1291   PetscFunctionBegin;
1292   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1293   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1294   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1295   if (iascii){
1296     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1297   } else if (isbinary) {
1298     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1299   } else if (isdraw) {
1300     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1301   } else {
1302     Mat B;
1303     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1304     ierr = MatView(B,viewer);CHKERRQ(ierr);
1305     ierr = MatDestroy(B);CHKERRQ(ierr);
1306   }
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1313 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1314 {
1315   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1316   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1317   PetscInt    *ai = a->i,*ailen = a->ilen;
1318   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap.bs,bs2=a->bs2;
1319   MatScalar   *ap,*aa = a->a,zero = 0.0;
1320 
1321   PetscFunctionBegin;
1322   for (k=0; k<m; k++) { /* loop over rows */
1323     row  = im[k]; brow = row/bs;
1324     if (row < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
1325     if (row >= A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1326     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1327     nrow = ailen[brow];
1328     for (l=0; l<n; l++) { /* loop over columns */
1329       if (in[l] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
1330       if (in[l] >= A->cmap.n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1331       col  = in[l] ;
1332       bcol = col/bs;
1333       cidx = col%bs;
1334       ridx = row%bs;
1335       high = nrow;
1336       low  = 0; /* assume unsorted */
1337       while (high-low > 5) {
1338         t = (low+high)/2;
1339         if (rp[t] > bcol) high = t;
1340         else             low  = t;
1341       }
1342       for (i=low; i<high; i++) {
1343         if (rp[i] > bcol) break;
1344         if (rp[i] == bcol) {
1345           *v++ = ap[bs2*i+bs*cidx+ridx];
1346           goto finished;
1347         }
1348       }
1349       *v++ = zero;
1350       finished:;
1351     }
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #if defined(PETSC_USE_MAT_SINGLE)
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1359 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
1360 {
1361   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)mat->data;
1362   PetscErrorCode ierr;
1363   PetscInt       i,N = m*n*b->bs2;
1364   MatScalar      *vsingle;
1365 
1366   PetscFunctionBegin;
1367   if (N > b->setvalueslen) {
1368     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
1369     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
1370     b->setvalueslen  = N;
1371   }
1372   vsingle = b->setvaluescopy;
1373   for (i=0; i<N; i++) {
1374     vsingle[i] = v[i];
1375   }
1376   ierr = MatSetValuesBlocked_SeqBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 #endif
1380 
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1384 PetscErrorCode MatSetValuesBlocked_SeqBAIJ_MatScalar(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode is)
1385 {
1386   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
1387   PetscInt        *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1388   PetscInt        *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1389   PetscErrorCode  ierr;
1390   PetscInt        *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap.bs,stepval;
1391   PetscTruth      roworiented=a->roworiented;
1392   const MatScalar *value = v;
1393   MatScalar       *ap,*aa = a->a,*bap;
1394 
1395   PetscFunctionBegin;
1396   if (roworiented) {
1397     stepval = (n-1)*bs;
1398   } else {
1399     stepval = (m-1)*bs;
1400   }
1401   for (k=0; k<m; k++) { /* loop over added rows */
1402     row  = im[k];
1403     if (row < 0) continue;
1404 #if defined(PETSC_USE_DEBUG)
1405     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1406 #endif
1407     rp   = aj + ai[row];
1408     ap   = aa + bs2*ai[row];
1409     rmax = imax[row];
1410     nrow = ailen[row];
1411     low  = 0;
1412     high = nrow;
1413     for (l=0; l<n; l++) { /* loop over added columns */
1414       if (in[l] < 0) continue;
1415 #if defined(PETSC_USE_DEBUG)
1416       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1417 #endif
1418       col = in[l];
1419       if (roworiented) {
1420         value = v + k*(stepval+bs)*bs + l*bs;
1421       } else {
1422         value = v + l*(stepval+bs)*bs + k*bs;
1423       }
1424       if (col <= lastcol) low = 0; else high = nrow;
1425       lastcol = col;
1426       while (high-low > 7) {
1427         t = (low+high)/2;
1428         if (rp[t] > col) high = t;
1429         else             low  = t;
1430       }
1431       for (i=low; i<high; i++) {
1432         if (rp[i] > col) break;
1433         if (rp[i] == col) {
1434           bap  = ap +  bs2*i;
1435           if (roworiented) {
1436             if (is == ADD_VALUES) {
1437               for (ii=0; ii<bs; ii++,value+=stepval) {
1438                 for (jj=ii; jj<bs2; jj+=bs) {
1439                   bap[jj] += *value++;
1440                 }
1441               }
1442             } else {
1443               for (ii=0; ii<bs; ii++,value+=stepval) {
1444                 for (jj=ii; jj<bs2; jj+=bs) {
1445                   bap[jj] = *value++;
1446                 }
1447               }
1448             }
1449           } else {
1450             if (is == ADD_VALUES) {
1451               for (ii=0; ii<bs; ii++,value+=stepval) {
1452                 for (jj=0; jj<bs; jj++) {
1453                   *bap++ += *value++;
1454                 }
1455               }
1456             } else {
1457               for (ii=0; ii<bs; ii++,value+=stepval) {
1458                 for (jj=0; jj<bs; jj++) {
1459                   *bap++  = *value++;
1460                 }
1461               }
1462             }
1463           }
1464           goto noinsert2;
1465         }
1466       }
1467       if (nonew == 1) goto noinsert2;
1468       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1469       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1470       N = nrow++ - 1; high++;
1471       /* shift up all the later entries in this row */
1472       for (ii=N; ii>=i; ii--) {
1473         rp[ii+1] = rp[ii];
1474         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1475       }
1476       if (N >= i) {
1477         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1478       }
1479       rp[i] = col;
1480       bap   = ap +  bs2*i;
1481       if (roworiented) {
1482         for (ii=0; ii<bs; ii++,value+=stepval) {
1483           for (jj=ii; jj<bs2; jj+=bs) {
1484             bap[jj] = *value++;
1485           }
1486         }
1487       } else {
1488         for (ii=0; ii<bs; ii++,value+=stepval) {
1489           for (jj=0; jj<bs; jj++) {
1490             *bap++  = *value++;
1491           }
1492         }
1493       }
1494       noinsert2:;
1495       low = i;
1496     }
1497     ailen[row] = nrow;
1498   }
1499   PetscFunctionReturn(0);
1500 }
1501 
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1504 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1505 {
1506   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1507   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1508   PetscInt       m = A->rmap.N,*ip,N,*ailen = a->ilen;
1509   PetscErrorCode ierr;
1510   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1511   MatScalar      *aa = a->a,*ap;
1512   PetscReal      ratio=0.6;
1513 
1514   PetscFunctionBegin;
1515   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1516 
1517   if (m) rmax = ailen[0];
1518   for (i=1; i<mbs; i++) {
1519     /* move each row back by the amount of empty slots (fshift) before it*/
1520     fshift += imax[i-1] - ailen[i-1];
1521     rmax   = PetscMax(rmax,ailen[i]);
1522     if (fshift) {
1523       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1524       N = ailen[i];
1525       for (j=0; j<N; j++) {
1526         ip[j-fshift] = ip[j];
1527         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1528       }
1529     }
1530     ai[i] = ai[i-1] + ailen[i-1];
1531   }
1532   if (mbs) {
1533     fshift += imax[mbs-1] - ailen[mbs-1];
1534     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1535   }
1536   /* reset ilen and imax for each row */
1537   for (i=0; i<mbs; i++) {
1538     ailen[i] = imax[i] = ai[i+1] - ai[i];
1539   }
1540   a->nz = ai[mbs];
1541 
1542   /* diagonals may have moved, so kill the diagonal pointers */
1543   a->idiagvalid = PETSC_FALSE;
1544   if (fshift && a->diag) {
1545     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1546     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1547     a->diag = 0;
1548   }
1549   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap.n,A->rmap.bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1550   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1551   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1552   a->reallocs          = 0;
1553   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1554 
1555   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1556   if (a->compressedrow.use){
1557     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1558   }
1559 
1560   A->same_nonzero = PETSC_TRUE;
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 /*
1565    This function returns an array of flags which indicate the locations of contiguous
1566    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1567    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1568    Assume: sizes should be long enough to hold all the values.
1569 */
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1572 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1573 {
1574   PetscInt   i,j,k,row;
1575   PetscTruth flg;
1576 
1577   PetscFunctionBegin;
1578   for (i=0,j=0; i<n; j++) {
1579     row = idx[i];
1580     if (row%bs!=0) { /* Not the begining of a block */
1581       sizes[j] = 1;
1582       i++;
1583     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1584       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1585       i++;
1586     } else { /* Begining of the block, so check if the complete block exists */
1587       flg = PETSC_TRUE;
1588       for (k=1; k<bs; k++) {
1589         if (row+k != idx[i+k]) { /* break in the block */
1590           flg = PETSC_FALSE;
1591           break;
1592         }
1593       }
1594       if (flg) { /* No break in the bs */
1595         sizes[j] = bs;
1596         i+= bs;
1597       } else {
1598         sizes[j] = 1;
1599         i++;
1600       }
1601     }
1602   }
1603   *bs_max = j;
1604   PetscFunctionReturn(0);
1605 }
1606 
1607 #undef __FUNCT__
1608 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1609 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1610 {
1611   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1612   PetscErrorCode ierr;
1613   PetscInt       i,j,k,count,*rows;
1614   PetscInt       bs=A->rmap.bs,bs2=baij->bs2,*sizes,row,bs_max;
1615   PetscScalar    zero = 0.0;
1616   MatScalar      *aa;
1617 
1618   PetscFunctionBegin;
1619   /* Make a copy of the IS and  sort it */
1620   /* allocate memory for rows,sizes */
1621   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1622   sizes = rows + is_n;
1623 
1624   /* copy IS values to rows, and sort them */
1625   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
1626   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
1627   if (baij->keepzeroedrows) {
1628     for (i=0; i<is_n; i++) { sizes[i] = 1; }
1629     bs_max = is_n;
1630     A->same_nonzero = PETSC_TRUE;
1631   } else {
1632     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
1633     A->same_nonzero = PETSC_FALSE;
1634   }
1635 
1636   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
1637     row   = rows[j];
1638     if (row < 0 || row > A->rmap.N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
1639     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
1640     aa    = baij->a + baij->i[row/bs]*bs2 + (row%bs);
1641     if (sizes[i] == bs && !baij->keepzeroedrows) {
1642       if (diag != 0.0) {
1643         if (baij->ilen[row/bs] > 0) {
1644           baij->ilen[row/bs]       = 1;
1645           baij->j[baij->i[row/bs]] = row/bs;
1646           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
1647         }
1648         /* Now insert all the diagonal values for this bs */
1649         for (k=0; k<bs; k++) {
1650           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
1651         }
1652       } else { /* (diag == 0.0) */
1653         baij->ilen[row/bs] = 0;
1654       } /* end (diag == 0.0) */
1655     } else { /* (sizes[i] != bs) */
1656 #if defined (PETSC_USE_DEBUG)
1657       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
1658 #endif
1659       for (k=0; k<count; k++) {
1660         aa[0] =  zero;
1661         aa    += bs;
1662       }
1663       if (diag != 0.0) {
1664         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
1665       }
1666     }
1667   }
1668 
1669   ierr = PetscFree(rows);CHKERRQ(ierr);
1670   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1671   PetscFunctionReturn(0);
1672 }
1673 
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatSetValues_SeqBAIJ"
1676 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1677 {
1678   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1679   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
1680   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1681   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap.bs,brow,bcol;
1682   PetscErrorCode ierr;
1683   PetscInt       ridx,cidx,bs2=a->bs2;
1684   PetscTruth     roworiented=a->roworiented;
1685   MatScalar      *ap,value,*aa=a->a,*bap;
1686 
1687   PetscFunctionBegin;
1688   for (k=0; k<m; k++) { /* loop over added rows */
1689     row  = im[k];
1690     brow = row/bs;
1691     if (row < 0) continue;
1692 #if defined(PETSC_USE_DEBUG)
1693     if (row >= A->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap.N-1);
1694 #endif
1695     rp   = aj + ai[brow];
1696     ap   = aa + bs2*ai[brow];
1697     rmax = imax[brow];
1698     nrow = ailen[brow];
1699     low  = 0;
1700     high = nrow;
1701     for (l=0; l<n; l++) { /* loop over added columns */
1702       if (in[l] < 0) continue;
1703 #if defined(PETSC_USE_DEBUG)
1704       if (in[l] >= A->cmap.n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap.n-1);
1705 #endif
1706       col = in[l]; bcol = col/bs;
1707       ridx = row % bs; cidx = col % bs;
1708       if (roworiented) {
1709         value = v[l + k*n];
1710       } else {
1711         value = v[k + l*m];
1712       }
1713       if (col <= lastcol) low = 0; else high = nrow;
1714       lastcol = col;
1715       while (high-low > 7) {
1716         t = (low+high)/2;
1717         if (rp[t] > bcol) high = t;
1718         else              low  = t;
1719       }
1720       for (i=low; i<high; i++) {
1721         if (rp[i] > bcol) break;
1722         if (rp[i] == bcol) {
1723           bap  = ap +  bs2*i + bs*cidx + ridx;
1724           if (is == ADD_VALUES) *bap += value;
1725           else                  *bap  = value;
1726           goto noinsert1;
1727         }
1728       }
1729       if (nonew == 1) goto noinsert1;
1730       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1731       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1732       N = nrow++ - 1; high++;
1733       /* shift up all the later entries in this row */
1734       for (ii=N; ii>=i; ii--) {
1735         rp[ii+1] = rp[ii];
1736         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1737       }
1738       if (N>=i) {
1739         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1740       }
1741       rp[i]                      = bcol;
1742       ap[bs2*i + bs*cidx + ridx] = value;
1743       a->nz++;
1744       noinsert1:;
1745       low = i;
1746     }
1747     ailen[brow] = nrow;
1748   }
1749   A->same_nonzero = PETSC_FALSE;
1750   PetscFunctionReturn(0);
1751 }
1752 
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
1756 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,MatFactorInfo *info)
1757 {
1758   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
1759   Mat            outA;
1760   PetscErrorCode ierr;
1761   PetscTruth     row_identity,col_identity;
1762 
1763   PetscFunctionBegin;
1764   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
1765   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
1766   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
1767   if (!row_identity || !col_identity) {
1768     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
1769   }
1770 
1771   outA          = inA;
1772   inA->factor   = FACTOR_LU;
1773 
1774   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
1775 
1776   a->row        = row;
1777   a->col        = col;
1778   ierr          = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
1779   ierr          = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
1780 
1781   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
1782   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
1783   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
1784 
1785   /*
1786       Blocksize 2, 3, 4, 5, 6 and 7 have a special faster factorization/solver
1787       for ILU(0) factorization with natural ordering
1788   */
1789   if (inA->rmap.bs < 8) {
1790     ierr = MatSeqBAIJ_UpdateFactorNumeric_NaturalOrdering(inA);CHKERRQ(ierr);
1791   } else {
1792     if (!a->solve_work) {
1793       ierr = PetscMalloc((inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
1794       ierr = PetscLogObjectMemory(inA,(inA->rmap.N+inA->rmap.bs)*sizeof(PetscScalar));CHKERRQ(ierr);
1795     }
1796   }
1797 
1798   ierr = MatLUFactorNumeric(inA,info,&outA);CHKERRQ(ierr);
1799 
1800   PetscFunctionReturn(0);
1801 }
1802 
1803 EXTERN_C_BEGIN
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
1806 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
1807 {
1808   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
1809   PetscInt    i,nz,nbs;
1810 
1811   PetscFunctionBegin;
1812   nz  = baij->maxnz/baij->bs2;
1813   nbs = baij->nbs;
1814   for (i=0; i<nz; i++) {
1815     baij->j[i] = indices[i];
1816   }
1817   baij->nz = nz;
1818   for (i=0; i<nbs; i++) {
1819     baij->ilen[i] = baij->imax[i];
1820   }
1821 
1822   PetscFunctionReturn(0);
1823 }
1824 EXTERN_C_END
1825 
1826 #undef __FUNCT__
1827 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
1828 /*@
1829     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
1830        in the matrix.
1831 
1832   Input Parameters:
1833 +  mat - the SeqBAIJ matrix
1834 -  indices - the column indices
1835 
1836   Level: advanced
1837 
1838   Notes:
1839     This can be called if you have precomputed the nonzero structure of the
1840   matrix and want to provide it to the matrix object to improve the performance
1841   of the MatSetValues() operation.
1842 
1843     You MUST have set the correct numbers of nonzeros per row in the call to
1844   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
1845 
1846     MUST be called before any calls to MatSetValues();
1847 
1848 @*/
1849 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
1850 {
1851   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
1852 
1853   PetscFunctionBegin;
1854   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
1855   PetscValidPointer(indices,2);
1856   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
1857   if (f) {
1858     ierr = (*f)(mat,indices);CHKERRQ(ierr);
1859   } else {
1860     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
1861   }
1862   PetscFunctionReturn(0);
1863 }
1864 
1865 #undef __FUNCT__
1866 #define __FUNCT__ "MatGetRowMax_SeqBAIJ"
1867 PetscErrorCode MatGetRowMax_SeqBAIJ(Mat A,Vec v)
1868 {
1869   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1870   PetscErrorCode ierr;
1871   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
1872   PetscReal      atmp;
1873   PetscScalar    *x,zero = 0.0;
1874   MatScalar      *aa;
1875   PetscInt       ncols,brow,krow,kcol;
1876 
1877   PetscFunctionBegin;
1878   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
1879   bs   = A->rmap.bs;
1880   aa   = a->a;
1881   ai   = a->i;
1882   aj   = a->j;
1883   mbs = a->mbs;
1884 
1885   ierr = VecSet(v,zero);CHKERRQ(ierr);
1886   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
1887   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
1888   if (n != A->rmap.N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
1889   for (i=0; i<mbs; i++) {
1890     ncols = ai[1] - ai[0]; ai++;
1891     brow  = bs*i;
1892     for (j=0; j<ncols; j++){
1893       /* bcol = bs*(*aj); */
1894       for (kcol=0; kcol<bs; kcol++){
1895         for (krow=0; krow<bs; krow++){
1896           atmp = PetscAbsScalar(*aa); aa++;
1897           row = brow + krow;    /* row index */
1898           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
1899           if (PetscAbsScalar(x[row]) < atmp) x[row] = atmp;
1900         }
1901       }
1902       aj++;
1903     }
1904   }
1905   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
1906   PetscFunctionReturn(0);
1907 }
1908 
1909 #undef __FUNCT__
1910 #define __FUNCT__ "MatCopy_SeqBAIJ"
1911 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
1912 {
1913   PetscErrorCode ierr;
1914 
1915   PetscFunctionBegin;
1916   /* If the two matrices have the same copy implementation, use fast copy. */
1917   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
1918     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1919     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
1920 
1921     if (a->i[A->rmap.N] != b->i[B->rmap.N]) {
1922       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
1923     }
1924     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap.N])*sizeof(PetscScalar));CHKERRQ(ierr);
1925   } else {
1926     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1927   }
1928   PetscFunctionReturn(0);
1929 }
1930 
1931 #undef __FUNCT__
1932 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
1933 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
1934 {
1935   PetscErrorCode ierr;
1936 
1937   PetscFunctionBegin;
1938   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 #undef __FUNCT__
1943 #define __FUNCT__ "MatGetArray_SeqBAIJ"
1944 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
1945 {
1946   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1947   PetscFunctionBegin;
1948   *array = a->a;
1949   PetscFunctionReturn(0);
1950 }
1951 
1952 #undef __FUNCT__
1953 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
1954 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
1955 {
1956   PetscFunctionBegin;
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 #include "petscblaslapack.h"
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatAXPY_SeqBAIJ"
1963 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1964 {
1965   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
1966   PetscErrorCode ierr;
1967   PetscInt       i,bs=Y->rmap.bs,j,bs2;
1968   PetscBLASInt   one=1,bnz = (PetscBLASInt)x->nz;
1969 
1970   PetscFunctionBegin;
1971   if (str == SAME_NONZERO_PATTERN) {
1972     PetscScalar alpha = a;
1973     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
1974   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
1975     if (y->xtoy && y->XtoY != X) {
1976       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
1977       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
1978     }
1979     if (!y->xtoy) { /* get xtoy */
1980       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
1981       y->XtoY = X;
1982     }
1983     bs2 = bs*bs;
1984     for (i=0; i<x->nz; i++) {
1985       j = 0;
1986       while (j < bs2){
1987         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
1988         j++;
1989       }
1990     }
1991     ierr = PetscInfo3(0,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
1992   } else {
1993     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1994   }
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2000 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2001 {
2002   Mat_SeqBAIJ     *a = (Mat_SeqBAIJ*)A->data;
2003   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2004   PetscScalar    *aa = a->a;
2005 
2006   PetscFunctionBegin;
2007   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 #undef __FUNCT__
2012 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2013 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2014 {
2015   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2016   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2017   PetscScalar    *aa = a->a;
2018 
2019   PetscFunctionBegin;
2020   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2021   PetscFunctionReturn(0);
2022 }
2023 
2024 
2025 /* -------------------------------------------------------------------*/
2026 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2027        MatGetRow_SeqBAIJ,
2028        MatRestoreRow_SeqBAIJ,
2029        MatMult_SeqBAIJ_N,
2030 /* 4*/ MatMultAdd_SeqBAIJ_N,
2031        MatMultTranspose_SeqBAIJ,
2032        MatMultTransposeAdd_SeqBAIJ,
2033        MatSolve_SeqBAIJ_N,
2034        0,
2035        0,
2036 /*10*/ 0,
2037        MatLUFactor_SeqBAIJ,
2038        0,
2039        0,
2040        MatTranspose_SeqBAIJ,
2041 /*15*/ MatGetInfo_SeqBAIJ,
2042        MatEqual_SeqBAIJ,
2043        MatGetDiagonal_SeqBAIJ,
2044        MatDiagonalScale_SeqBAIJ,
2045        MatNorm_SeqBAIJ,
2046 /*20*/ 0,
2047        MatAssemblyEnd_SeqBAIJ,
2048        0,
2049        MatSetOption_SeqBAIJ,
2050        MatZeroEntries_SeqBAIJ,
2051 /*25*/ MatZeroRows_SeqBAIJ,
2052        MatLUFactorSymbolic_SeqBAIJ,
2053        MatLUFactorNumeric_SeqBAIJ_N,
2054        MatCholeskyFactorSymbolic_SeqBAIJ,
2055        MatCholeskyFactorNumeric_SeqBAIJ_N,
2056 /*30*/ MatSetUpPreallocation_SeqBAIJ,
2057        MatILUFactorSymbolic_SeqBAIJ,
2058        MatICCFactorSymbolic_SeqBAIJ,
2059        MatGetArray_SeqBAIJ,
2060        MatRestoreArray_SeqBAIJ,
2061 /*35*/ MatDuplicate_SeqBAIJ,
2062        0,
2063        0,
2064        MatILUFactor_SeqBAIJ,
2065        0,
2066 /*40*/ MatAXPY_SeqBAIJ,
2067        MatGetSubMatrices_SeqBAIJ,
2068        MatIncreaseOverlap_SeqBAIJ,
2069        MatGetValues_SeqBAIJ,
2070        MatCopy_SeqBAIJ,
2071 /*45*/ 0,
2072        MatScale_SeqBAIJ,
2073        0,
2074        0,
2075        0,
2076 /*50*/ 0,
2077        MatGetRowIJ_SeqBAIJ,
2078        MatRestoreRowIJ_SeqBAIJ,
2079        0,
2080        0,
2081 /*55*/ 0,
2082        0,
2083        0,
2084        0,
2085        MatSetValuesBlocked_SeqBAIJ,
2086 /*60*/ MatGetSubMatrix_SeqBAIJ,
2087        MatDestroy_SeqBAIJ,
2088        MatView_SeqBAIJ,
2089        0,
2090        0,
2091 /*65*/ 0,
2092        0,
2093        0,
2094        0,
2095        0,
2096 /*70*/ MatGetRowMax_SeqBAIJ,
2097        MatConvert_Basic,
2098        0,
2099        0,
2100        0,
2101 /*75*/ 0,
2102        0,
2103        0,
2104        0,
2105        0,
2106 /*80*/ 0,
2107        0,
2108        0,
2109        0,
2110        MatLoad_SeqBAIJ,
2111 /*85*/ 0,
2112        0,
2113        0,
2114        0,
2115        0,
2116 /*90*/ 0,
2117        0,
2118        0,
2119        0,
2120        0,
2121 /*95*/ 0,
2122        0,
2123        0,
2124        0,
2125        0,
2126 /*100*/0,
2127        0,
2128        0,
2129        0,
2130        0,
2131 /*105*/0,
2132        MatRealPart_SeqBAIJ,
2133        MatImaginaryPart_SeqBAIJ
2134 };
2135 
2136 EXTERN_C_BEGIN
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2139 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2140 {
2141   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2142   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2143   PetscErrorCode ierr;
2144 
2145   PetscFunctionBegin;
2146   if (aij->nonew != 1) {
2147     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2148   }
2149 
2150   /* allocate space for values if not already there */
2151   if (!aij->saved_values) {
2152     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2153   }
2154 
2155   /* copy values over */
2156   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2157   PetscFunctionReturn(0);
2158 }
2159 EXTERN_C_END
2160 
2161 EXTERN_C_BEGIN
2162 #undef __FUNCT__
2163 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2164 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2165 {
2166   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2167   PetscErrorCode ierr;
2168   PetscInt       nz = aij->i[mat->rmap.N]*mat->rmap.bs*aij->bs2;
2169 
2170   PetscFunctionBegin;
2171   if (aij->nonew != 1) {
2172     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NO_NEW_NONZERO_LOCATIONS);first");
2173   }
2174   if (!aij->saved_values) {
2175     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2176   }
2177 
2178   /* copy values over */
2179   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2180   PetscFunctionReturn(0);
2181 }
2182 EXTERN_C_END
2183 
2184 EXTERN_C_BEGIN
2185 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2186 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2187 EXTERN_C_END
2188 
2189 EXTERN_C_BEGIN
2190 #undef __FUNCT__
2191 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2192 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2193 {
2194   Mat_SeqBAIJ    *b;
2195   PetscErrorCode ierr;
2196   PetscInt       i,mbs,nbs,bs2,newbs = bs;
2197   PetscTruth     flg,skipallocation = PETSC_FALSE;
2198 
2199   PetscFunctionBegin;
2200 
2201   if (nz == MAT_SKIP_ALLOCATION) {
2202     skipallocation = PETSC_TRUE;
2203     nz             = 0;
2204   }
2205 
2206   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2207     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",bs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2208   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2209 
2210   if (nnz && newbs != bs) {
2211     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2212   }
2213   bs   = newbs;
2214 
2215   B->rmap.bs = B->cmap.bs = bs;
2216   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
2217   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
2218 
2219   B->preallocated = PETSC_TRUE;
2220 
2221   mbs  = B->rmap.n/bs;
2222   nbs  = B->cmap.n/bs;
2223   bs2  = bs*bs;
2224 
2225   if (mbs*bs!=B->rmap.n || nbs*bs!=B->cmap.n) {
2226     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap.N,B->cmap.n,bs);
2227   }
2228 
2229   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2230   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2231   if (nnz) {
2232     for (i=0; i<mbs; i++) {
2233       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2234       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2235     }
2236   }
2237 
2238   b       = (Mat_SeqBAIJ*)B->data;
2239   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2240     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2241   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2242 
2243   B->ops->solve               = MatSolve_SeqBAIJ_Update;
2244   B->ops->solvetranspose      = MatSolveTranspose_SeqBAIJ_Update;
2245   if (!flg) {
2246     switch (bs) {
2247     case 1:
2248       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
2249       B->ops->mult            = MatMult_SeqBAIJ_1;
2250       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2251       break;
2252     case 2:
2253       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
2254       B->ops->mult            = MatMult_SeqBAIJ_2;
2255       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2256       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2257       break;
2258     case 3:
2259       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
2260       B->ops->mult            = MatMult_SeqBAIJ_3;
2261       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2262       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2263       break;
2264     case 4:
2265       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
2266       B->ops->mult            = MatMult_SeqBAIJ_4;
2267       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2268       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2269       break;
2270     case 5:
2271       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
2272       B->ops->mult            = MatMult_SeqBAIJ_5;
2273       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2274       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2275       break;
2276     case 6:
2277       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
2278       B->ops->mult            = MatMult_SeqBAIJ_6;
2279       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2280       break;
2281     case 7:
2282       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
2283       B->ops->mult            = MatMult_SeqBAIJ_7;
2284       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2285       break;
2286     default:
2287       B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
2288       B->ops->mult            = MatMult_SeqBAIJ_N;
2289       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2290       break;
2291     }
2292   }
2293   B->rmap.bs      = bs;
2294   b->mbs     = mbs;
2295   b->nbs     = nbs;
2296   if (!skipallocation) {
2297     ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2298     /* b->ilen will count nonzeros in each block row so far. */
2299     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2300     if (!nnz) {
2301       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2302       else if (nz <= 0)        nz = 1;
2303       for (i=0; i<mbs; i++) b->imax[i] = nz;
2304       nz = nz*mbs;
2305     } else {
2306       nz = 0;
2307       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2308     }
2309 
2310     /* allocate the matrix space */
2311     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap.N+1,PetscInt,&b->i);CHKERRQ(ierr);
2312     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2313     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2314     b->singlemalloc = PETSC_TRUE;
2315 
2316     b->i[0] = 0;
2317     for (i=1; i<mbs+1; i++) {
2318       b->i[i] = b->i[i-1] + b->imax[i-1];
2319     }
2320     b->free_a     = PETSC_TRUE;
2321     b->free_ij    = PETSC_TRUE;
2322   } else {
2323     b->free_a     = PETSC_FALSE;
2324     b->free_ij    = PETSC_FALSE;
2325   }
2326 
2327   B->rmap.bs          = bs;
2328   b->bs2              = bs2;
2329   b->mbs              = mbs;
2330   b->nz               = 0;
2331   b->maxnz            = nz*bs2;
2332   B->info.nz_unneeded = (PetscReal)b->maxnz;
2333   PetscFunctionReturn(0);
2334 }
2335 EXTERN_C_END
2336 
2337 /*MC
2338    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2339    block sparse compressed row format.
2340 
2341    Options Database Keys:
2342 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2343 
2344   Level: beginner
2345 
2346 .seealso: MatCreateSeqBAIJ()
2347 M*/
2348 
2349 EXTERN_C_BEGIN
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatCreate_SeqBAIJ"
2352 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2353 {
2354   PetscErrorCode ierr;
2355   PetscMPIInt    size;
2356   Mat_SeqBAIJ    *b;
2357 
2358   PetscFunctionBegin;
2359   ierr = MPI_Comm_size(B->comm,&size);CHKERRQ(ierr);
2360   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2361 
2362   ierr    = PetscNew(Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2363   B->data = (void*)b;
2364   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2365   B->factor           = 0;
2366   B->mapping          = 0;
2367   b->row              = 0;
2368   b->col              = 0;
2369   b->icol             = 0;
2370   b->reallocs         = 0;
2371   b->saved_values     = 0;
2372 #if defined(PETSC_USE_MAT_SINGLE)
2373   b->setvalueslen     = 0;
2374   b->setvaluescopy    = PETSC_NULL;
2375 #endif
2376 
2377   b->sorted           = PETSC_FALSE;
2378   b->roworiented      = PETSC_TRUE;
2379   b->nonew            = 0;
2380   b->diag             = 0;
2381   b->solve_work       = 0;
2382   b->mult_work        = 0;
2383   B->spptr            = 0;
2384   B->info.nz_unneeded = (PetscReal)b->maxnz;
2385   b->keepzeroedrows   = PETSC_FALSE;
2386   b->xtoy              = 0;
2387   b->XtoY              = 0;
2388   b->compressedrow.use     = PETSC_FALSE;
2389   b->compressedrow.nrows   = 0;
2390   b->compressedrow.i       = PETSC_NULL;
2391   b->compressedrow.rindex  = PETSC_NULL;
2392   b->compressedrow.checked = PETSC_FALSE;
2393   B->same_nonzero          = PETSC_FALSE;
2394 
2395   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2396                                      "MatInvertBlockDiagonal_SeqBAIJ",
2397                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2398   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2399                                      "MatStoreValues_SeqBAIJ",
2400                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2401   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2402                                      "MatRetrieveValues_SeqBAIJ",
2403                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2404   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2405                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2406                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2407   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2408                                      "MatConvert_SeqBAIJ_SeqAIJ",
2409                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2410   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2411                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2412                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2413   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2414                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2415                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2416   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 EXTERN_C_END
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2423 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2424 {
2425   Mat            C;
2426   Mat_SeqBAIJ    *c,*a = (Mat_SeqBAIJ*)A->data;
2427   PetscErrorCode ierr;
2428   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2429 
2430   PetscFunctionBegin;
2431   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2432 
2433   *B = 0;
2434   ierr = MatCreate(A->comm,&C);CHKERRQ(ierr);
2435   ierr = MatSetSizes(C,A->rmap.N,A->cmap.n,A->rmap.N,A->cmap.n);CHKERRQ(ierr);
2436   ierr = MatSetType(C,A->type_name);CHKERRQ(ierr);
2437   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2438   c    = (Mat_SeqBAIJ*)C->data;
2439 
2440   C->rmap.N   = A->rmap.N;
2441   C->cmap.N   = A->cmap.N;
2442   C->rmap.bs  = A->rmap.bs;
2443   c->bs2 = a->bs2;
2444   c->mbs = a->mbs;
2445   c->nbs = a->nbs;
2446 
2447   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2448   for (i=0; i<mbs; i++) {
2449     c->imax[i] = a->imax[i];
2450     c->ilen[i] = a->ilen[i];
2451   }
2452 
2453   /* allocate the matrix space */
2454   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2455   c->singlemalloc = PETSC_TRUE;
2456   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2457   if (mbs > 0) {
2458     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2459     if (cpvalues == MAT_COPY_VALUES) {
2460       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2461     } else {
2462       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2463     }
2464   }
2465   c->sorted      = a->sorted;
2466   c->roworiented = a->roworiented;
2467   c->nonew       = a->nonew;
2468 
2469   if (a->diag) {
2470     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2471     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2472     for (i=0; i<mbs; i++) {
2473       c->diag[i] = a->diag[i];
2474     }
2475   } else c->diag        = 0;
2476   c->nz                 = a->nz;
2477   c->maxnz              = a->maxnz;
2478   c->solve_work         = 0;
2479   c->mult_work          = 0;
2480   c->free_a             = PETSC_TRUE;
2481   c->free_ij            = PETSC_TRUE;
2482   C->preallocated       = PETSC_TRUE;
2483   C->assembled          = PETSC_TRUE;
2484 
2485   c->compressedrow.use     = a->compressedrow.use;
2486   c->compressedrow.nrows   = a->compressedrow.nrows;
2487   c->compressedrow.checked = a->compressedrow.checked;
2488   if ( a->compressedrow.checked && a->compressedrow.use){
2489     i = a->compressedrow.nrows;
2490     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2491     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2492     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2493     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2494   } else {
2495     c->compressedrow.use    = PETSC_FALSE;
2496     c->compressedrow.i      = PETSC_NULL;
2497     c->compressedrow.rindex = PETSC_NULL;
2498   }
2499   C->same_nonzero = A->same_nonzero;
2500   *B = C;
2501   ierr = PetscFListDuplicate(A->qlist,&C->qlist);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatLoad_SeqBAIJ"
2507 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, MatType type,Mat *A)
2508 {
2509   Mat_SeqBAIJ    *a;
2510   Mat            B;
2511   PetscErrorCode ierr;
2512   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2513   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2514   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2515   PetscInt       *masked,nmask,tmp,bs2,ishift;
2516   PetscMPIInt    size;
2517   int            fd;
2518   PetscScalar    *aa;
2519   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2520 
2521   PetscFunctionBegin;
2522   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2523     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2524   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2525   bs2  = bs*bs;
2526 
2527   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2528   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2529   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2530   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2531   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2532   M = header[1]; N = header[2]; nz = header[3];
2533 
2534   if (header[3] < 0) {
2535     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2536   }
2537 
2538   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2539 
2540   /*
2541      This code adds extra rows to make sure the number of rows is
2542     divisible by the blocksize
2543   */
2544   mbs        = M/bs;
2545   extra_rows = bs - M + bs*(mbs);
2546   if (extra_rows == bs) extra_rows = 0;
2547   else                  mbs++;
2548   if (extra_rows) {
2549     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2550   }
2551 
2552   /* read in row lengths */
2553   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2554   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2555   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2556 
2557   /* read in column indices */
2558   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
2559   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
2560   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
2561 
2562   /* loop over row lengths determining block row lengths */
2563   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
2564   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2565   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2566   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
2567   masked   = mask + mbs;
2568   rowcount = 0; nzcount = 0;
2569   for (i=0; i<mbs; i++) {
2570     nmask = 0;
2571     for (j=0; j<bs; j++) {
2572       kmax = rowlengths[rowcount];
2573       for (k=0; k<kmax; k++) {
2574         tmp = jj[nzcount++]/bs;
2575         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
2576       }
2577       rowcount++;
2578     }
2579     browlengths[i] += nmask;
2580     /* zero out the mask elements we set */
2581     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2582   }
2583 
2584   /* create our matrix */
2585   ierr = MatCreate(comm,&B);
2586   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
2587   ierr = MatSetType(B,type);CHKERRQ(ierr);
2588   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
2589   a = (Mat_SeqBAIJ*)B->data;
2590 
2591   /* set matrix "i" values */
2592   a->i[0] = 0;
2593   for (i=1; i<= mbs; i++) {
2594     a->i[i]      = a->i[i-1] + browlengths[i-1];
2595     a->ilen[i-1] = browlengths[i-1];
2596   }
2597   a->nz         = 0;
2598   for (i=0; i<mbs; i++) a->nz += browlengths[i];
2599 
2600   /* read in nonzero values */
2601   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
2602   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
2603   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
2604 
2605   /* set "a" and "j" values into matrix */
2606   nzcount = 0; jcount = 0;
2607   for (i=0; i<mbs; i++) {
2608     nzcountb = nzcount;
2609     nmask    = 0;
2610     for (j=0; j<bs; j++) {
2611       kmax = rowlengths[i*bs+j];
2612       for (k=0; k<kmax; k++) {
2613         tmp = jj[nzcount++]/bs;
2614 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
2615       }
2616     }
2617     /* sort the masked values */
2618     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
2619 
2620     /* set "j" values into matrix */
2621     maskcount = 1;
2622     for (j=0; j<nmask; j++) {
2623       a->j[jcount++]  = masked[j];
2624       mask[masked[j]] = maskcount++;
2625     }
2626     /* set "a" values into matrix */
2627     ishift = bs2*a->i[i];
2628     for (j=0; j<bs; j++) {
2629       kmax = rowlengths[i*bs+j];
2630       for (k=0; k<kmax; k++) {
2631         tmp       = jj[nzcountb]/bs ;
2632         block     = mask[tmp] - 1;
2633         point     = jj[nzcountb] - bs*tmp;
2634         idx       = ishift + bs2*block + j + bs*point;
2635         a->a[idx] = (MatScalar)aa[nzcountb++];
2636       }
2637     }
2638     /* zero out the mask elements we set */
2639     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
2640   }
2641   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
2642 
2643   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2644   ierr = PetscFree(browlengths);CHKERRQ(ierr);
2645   ierr = PetscFree(aa);CHKERRQ(ierr);
2646   ierr = PetscFree(jj);CHKERRQ(ierr);
2647   ierr = PetscFree(mask);CHKERRQ(ierr);
2648 
2649   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2650   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2651   ierr = MatView_Private(B);CHKERRQ(ierr);
2652 
2653   *A = B;
2654   PetscFunctionReturn(0);
2655 }
2656 
2657 #undef __FUNCT__
2658 #define __FUNCT__ "MatCreateSeqBAIJ"
2659 /*@C
2660    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
2661    compressed row) format.  For good matrix assembly performance the
2662    user should preallocate the matrix storage by setting the parameter nz
2663    (or the array nnz).  By setting these parameters accurately, performance
2664    during matrix assembly can be increased by more than a factor of 50.
2665 
2666    Collective on MPI_Comm
2667 
2668    Input Parameters:
2669 +  comm - MPI communicator, set to PETSC_COMM_SELF
2670 .  bs - size of block
2671 .  m - number of rows
2672 .  n - number of columns
2673 .  nz - number of nonzero blocks  per block row (same for all rows)
2674 -  nnz - array containing the number of nonzero blocks in the various block rows
2675          (possibly different for each block row) or PETSC_NULL
2676 
2677    Output Parameter:
2678 .  A - the matrix
2679 
2680    Options Database Keys:
2681 .   -mat_no_unroll - uses code that does not unroll the loops in the
2682                      block calculations (much slower)
2683 .    -mat_block_size - size of the blocks to use
2684 
2685    Level: intermediate
2686 
2687    Notes:
2688    The number of rows and columns must be divisible by blocksize.
2689 
2690    If the nnz parameter is given then the nz parameter is ignored
2691 
2692    A nonzero block is any block that as 1 or more nonzeros in it
2693 
2694    The block AIJ format is fully compatible with standard Fortran 77
2695    storage.  That is, the stored row and column indices can begin at
2696    either one (as in Fortran) or zero.  See the users' manual for details.
2697 
2698    Specify the preallocated storage with either nz or nnz (not both).
2699    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2700    allocation.  For additional details, see the users manual chapter on
2701    matrices.
2702 
2703 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2704 @*/
2705 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
2706 {
2707   PetscErrorCode ierr;
2708 
2709   PetscFunctionBegin;
2710   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2711   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
2712   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
2713   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
2719 /*@C
2720    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
2721    per row in the matrix. For good matrix assembly performance the
2722    user should preallocate the matrix storage by setting the parameter nz
2723    (or the array nnz).  By setting these parameters accurately, performance
2724    during matrix assembly can be increased by more than a factor of 50.
2725 
2726    Collective on MPI_Comm
2727 
2728    Input Parameters:
2729 +  A - the matrix
2730 .  bs - size of block
2731 .  nz - number of block nonzeros per block row (same for all rows)
2732 -  nnz - array containing the number of block nonzeros in the various block rows
2733          (possibly different for each block row) or PETSC_NULL
2734 
2735    Options Database Keys:
2736 .   -mat_no_unroll - uses code that does not unroll the loops in the
2737                      block calculations (much slower)
2738 .    -mat_block_size - size of the blocks to use
2739 
2740    Level: intermediate
2741 
2742    Notes:
2743    If the nnz parameter is given then the nz parameter is ignored
2744 
2745    The block AIJ format is fully compatible with standard Fortran 77
2746    storage.  That is, the stored row and column indices can begin at
2747    either one (as in Fortran) or zero.  See the users' manual for details.
2748 
2749    Specify the preallocated storage with either nz or nnz (not both).
2750    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
2751    allocation.  For additional details, see the users manual chapter on
2752    matrices.
2753 
2754 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2755 @*/
2756 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
2757 {
2758   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
2759 
2760   PetscFunctionBegin;
2761   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
2762   if (f) {
2763     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
2764   }
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 #undef __FUNCT__
2769 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
2770 /*@
2771      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
2772               (upper triangular entries in CSR format) provided by the user.
2773 
2774      Collective on MPI_Comm
2775 
2776    Input Parameters:
2777 +  comm - must be an MPI communicator of size 1
2778 .  bs - size of block
2779 .  m - number of rows
2780 .  n - number of columns
2781 .  i - row indices
2782 .  j - column indices
2783 -  a - matrix values
2784 
2785    Output Parameter:
2786 .  mat - the matrix
2787 
2788    Level: intermediate
2789 
2790    Notes:
2791        The i, j, and a arrays are not copied by this routine, the user must free these arrays
2792     once the matrix is destroyed
2793 
2794        You cannot set new nonzero locations into this matrix, that will generate an error.
2795 
2796        The i and j indices are 0 based
2797 
2798 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
2799 
2800 @*/
2801 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
2802 {
2803   PetscErrorCode ierr;
2804   PetscInt       ii;
2805   Mat_SeqBAIJ    *baij;
2806 
2807   PetscFunctionBegin;
2808   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
2809   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2810 
2811   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2812   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
2813   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
2814   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
2815   baij = (Mat_SeqBAIJ*)(*mat)->data;
2816   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
2817 
2818   baij->i = i;
2819   baij->j = j;
2820   baij->a = a;
2821   baij->singlemalloc = PETSC_FALSE;
2822   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
2823   baij->free_a       = PETSC_FALSE;
2824   baij->free_ij       = PETSC_FALSE;
2825 
2826   for (ii=0; ii<m; ii++) {
2827     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
2828 #if defined(PETSC_USE_DEBUG)
2829     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
2830 #endif
2831   }
2832 #if defined(PETSC_USE_DEBUG)
2833   for (ii=0; ii<baij->i[m]; ii++) {
2834     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
2835     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
2836   }
2837 #endif
2838 
2839   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2840   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2841   PetscFunctionReturn(0);
2842 }
2843