xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d4bf62d159b4eec940d9872c577cf78ea329f539)
1 #define PETSCMAT_DLL
2 
3 /*
4     Defines the basic matrix operations for the BAIJ (compressed row)
5   matrix storage format.
6 */
7 #include "../src/mat/impls/baij/seq/baij.h"
8 #include "petscsys.h"                     /*I "petscmat.h" I*/
9 
10 #include "../src/mat/blockinvert.h"
11 
12 #undef __FUNCT__
13 #define __FUNCT__ "MatSeqBAIJInvertBlockDiagonal"
14 /*@
15   MatSeqBAIJInvertBlockDiagonal - Inverts the block diagonal entries.
16 
17   Collective on Mat
18 
19   Input Parameters:
20 . mat - the matrix
21 
22   Level: advanced
23 @*/
24 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJInvertBlockDiagonal(Mat mat)
25 {
26   PetscErrorCode ierr,(*f)(Mat);
27 
28   PetscFunctionBegin;
29   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
30   if (!mat->assembled) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for unassembled matrix");
31   if (mat->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
32 
33   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJInvertBlockDiagonal_C",(void (**)(void))&f);CHKERRQ(ierr);
34   if (f) {
35     ierr = (*f)(mat);CHKERRQ(ierr);
36   } else {
37     SETERRQ(PETSC_ERR_SUP,"Currently only implemented for SeqBAIJ.");
38   }
39   PetscFunctionReturn(0);
40 }
41 
42 EXTERN_C_BEGIN
43 #undef __FUNCT__
44 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
45 PetscErrorCode PETSCMAT_DLLEXPORT MatInvertBlockDiagonal_SeqBAIJ(Mat A)
46 {
47   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
48   PetscErrorCode ierr;
49   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs;
50   MatScalar      *v = a->a,*odiag,*diag,*mdiag;
51   PetscReal      shift = 0.0;
52 
53   PetscFunctionBegin;
54   if (a->idiagvalid) PetscFunctionReturn(0);
55   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
56   diag_offset = a->diag;
57   if (!a->idiag) {
58     ierr = PetscMalloc(2*bs*bs*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
59   }
60   diag  = a->idiag;
61   mdiag = a->idiag+bs*bs*mbs;
62   /* factor and invert each block */
63   switch (bs){
64     case 2:
65       for (i=0; i<mbs; i++) {
66         odiag   = v + 4*diag_offset[i];
67         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68 	mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
69 	ierr     = Kernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
70 	diag    += 4;
71 	mdiag   += 4;
72       }
73       break;
74     case 3:
75       for (i=0; i<mbs; i++) {
76         odiag    = v + 9*diag_offset[i];
77         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79         diag[8]  = odiag[8];
80         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
81         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
82         mdiag[8] = odiag[8];
83 	ierr     = Kernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
84 	diag    += 9;
85 	mdiag   += 9;
86       }
87       break;
88     case 4:
89       for (i=0; i<mbs; i++) {
90         odiag  = v + 16*diag_offset[i];
91         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
92         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
93 	ierr   = Kernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
94 	diag  += 16;
95 	mdiag += 16;
96       }
97       break;
98     case 5:
99       for (i=0; i<mbs; i++) {
100         odiag = v + 25*diag_offset[i];
101         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
102         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
103 	ierr   = Kernel_A_gets_inverse_A_5(diag,shift);CHKERRQ(ierr);
104 	diag  += 25;
105 	mdiag += 25;
106       }
107       break;
108     case 6:
109       for (i=0; i<mbs; i++) {
110         odiag = v + 36*diag_offset[i];
111         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
112         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
113 	ierr   = Kernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
114 	diag  += 36;
115 	mdiag += 36;
116       }
117       break;
118     default:
119       SETERRQ1(PETSC_ERR_SUP,"not supported for block size %D",bs);
120   }
121   a->idiagvalid = PETSC_TRUE;
122   PetscFunctionReturn(0);
123 }
124 EXTERN_C_END
125 
126 #undef __FUNCT__
127 #define __FUNCT__ "MatPBRelax_SeqBAIJ_1"
128 PetscErrorCode MatPBRelax_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
129 {
130   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
131   PetscScalar        *x,x1,s1;
132   const PetscScalar  *b;
133   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
134   PetscErrorCode     ierr;
135   PetscInt           m = a->mbs,i,i2,nz,idx;
136   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
137 
138   PetscFunctionBegin;
139   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
140   its = its*lits;
141   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
142   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
143   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
144   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
145   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
146 
147   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
148 
149   diag  = a->diag;
150   idiag = a->idiag;
151   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
152   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
153 
154   if (flag & SOR_ZERO_INITIAL_GUESS) {
155     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
156       x[0] = b[0]*idiag[0];
157       i2     = 1;
158       idiag += 1;
159       for (i=1; i<m; i++) {
160         v     = aa + ai[i];
161         vi    = aj + ai[i];
162         nz    = diag[i] - ai[i];
163         s1    = b[i2];
164         while (nz--) {
165           idx  = (*vi++);
166           x1   = x[idx];
167           s1  -= v[0]*x1;
168           v   += 1;
169         }
170         x[i2]   = idiag[0]*s1;
171         idiag   += 1;
172         i2      += 1;
173       }
174       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
175       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
176     }
177     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
178         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
179       i2    = 0;
180       mdiag = a->idiag+a->mbs;
181       for (i=0; i<m; i++) {
182         x1      = x[i2];
183         x[i2]   = mdiag[0]*x1;
184         mdiag  += 1;
185         i2     += 1;
186       }
187       ierr = PetscLogFlops(m);CHKERRQ(ierr);
188     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
189       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
190     }
191     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
192       idiag   = a->idiag+a->mbs - 1;
193       i2      = m - 1;
194       x1      = x[i2];
195       x[i2]   = idiag[0]*x1;
196       idiag -= 1;
197       i2    -= 1;
198       for (i=m-2; i>=0; i--) {
199         v     = aa + (diag[i]+1);
200         vi    = aj + diag[i] + 1;
201         nz    = ai[i+1] - diag[i] - 1;
202         s1    = x[i2];
203         while (nz--) {
204           idx  = (*vi++);
205           x1   = x[idx];
206           s1  -= v[0]*x1;
207           v   += 1;
208         }
209         x[i2]   = idiag[0]*s1;
210         idiag   -= 2;
211         i2      -= 1;
212       }
213       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
214     }
215   } else {
216     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
217   }
218   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
219   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
220   PetscFunctionReturn(0);
221 }
222 
223 #undef __FUNCT__
224 #define __FUNCT__ "MatPBRelax_SeqBAIJ_2"
225 PetscErrorCode MatPBRelax_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
226 {
227   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
228   PetscScalar        *x,x1,x2,s1,s2;
229   const PetscScalar  *b;
230   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
231   PetscErrorCode     ierr;
232   PetscInt           m = a->mbs,i,i2,nz,idx;
233   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
234 
235   PetscFunctionBegin;
236   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
237   its = its*lits;
238   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
239   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
240   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
241   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
242   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
243 
244   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
245 
246   diag  = a->diag;
247   idiag = a->idiag;
248   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
249   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
250 
251   if (flag & SOR_ZERO_INITIAL_GUESS) {
252     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
253       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
254       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
255       i2     = 2;
256       idiag += 4;
257       for (i=1; i<m; i++) {
258 	v     = aa + 4*ai[i];
259 	vi    = aj + ai[i];
260 	nz    = diag[i] - ai[i];
261 	s1    = b[i2]; s2 = b[i2+1];
262 	while (nz--) {
263 	  idx  = 2*(*vi++);
264 	  x1   = x[idx]; x2 = x[1+idx];
265 	  s1  -= v[0]*x1 + v[2]*x2;
266 	  s2  -= v[1]*x1 + v[3]*x2;
267 	  v   += 4;
268 	}
269 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
270 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
271         idiag   += 4;
272         i2      += 2;
273       }
274       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
275       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
276     }
277     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
278         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
279       i2    = 0;
280       mdiag = a->idiag+4*a->mbs;
281       for (i=0; i<m; i++) {
282         x1      = x[i2]; x2 = x[i2+1];
283         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
284         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
285         mdiag  += 4;
286         i2     += 2;
287       }
288       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
289     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
290       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
291     }
292     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
293       idiag   = a->idiag+4*a->mbs - 4;
294       i2      = 2*m - 2;
295       x1      = x[i2]; x2 = x[i2+1];
296       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
297       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
298       idiag -= 4;
299       i2    -= 2;
300       for (i=m-2; i>=0; i--) {
301 	v     = aa + 4*(diag[i]+1);
302 	vi    = aj + diag[i] + 1;
303 	nz    = ai[i+1] - diag[i] - 1;
304 	s1    = x[i2]; s2 = x[i2+1];
305 	while (nz--) {
306 	  idx  = 2*(*vi++);
307 	  x1   = x[idx]; x2 = x[1+idx];
308 	  s1  -= v[0]*x1 + v[2]*x2;
309 	  s2  -= v[1]*x1 + v[3]*x2;
310 	  v   += 4;
311 	}
312 	x[i2]   = idiag[0]*s1 + idiag[2]*s2;
313 	x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
314         idiag   -= 4;
315         i2      -= 2;
316       }
317       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
318     }
319   } else {
320     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
321   }
322   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
323   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
324   PetscFunctionReturn(0);
325 }
326 
327 #undef __FUNCT__
328 #define __FUNCT__ "MatPBRelax_SeqBAIJ_3"
329 PetscErrorCode MatPBRelax_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
330 {
331   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
332   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
333   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
334   const PetscScalar  *b;
335   PetscErrorCode     ierr;
336   PetscInt           m = a->mbs,i,i2,nz,idx;
337   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
338 
339   PetscFunctionBegin;
340   its = its*lits;
341   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
342   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
343   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
344   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
345   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
346   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
347 
348   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
349 
350   diag  = a->diag;
351   idiag = a->idiag;
352   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
353   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
354 
355   if (flag & SOR_ZERO_INITIAL_GUESS) {
356     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
357       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
358       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
359       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
360       i2     = 3;
361       idiag += 9;
362       for (i=1; i<m; i++) {
363         v     = aa + 9*ai[i];
364         vi    = aj + ai[i];
365         nz    = diag[i] - ai[i];
366         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
367         while (nz--) {
368           idx  = 3*(*vi++);
369           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
370           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
371           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
372           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
373           v   += 9;
374         }
375         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
376         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
377         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
378         idiag   += 9;
379         i2      += 3;
380       }
381       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
382       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
383     }
384     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
385         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
386       i2    = 0;
387       mdiag = a->idiag+9*a->mbs;
388       for (i=0; i<m; i++) {
389         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
390         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
391         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
392         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
393         mdiag  += 9;
394         i2     += 3;
395       }
396       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
397     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
398       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
399     }
400     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
401       idiag   = a->idiag+9*a->mbs - 9;
402       i2      = 3*m - 3;
403       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
404       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
405       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
406       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
407       idiag -= 9;
408       i2    -= 3;
409       for (i=m-2; i>=0; i--) {
410         v     = aa + 9*(diag[i]+1);
411         vi    = aj + diag[i] + 1;
412         nz    = ai[i+1] - diag[i] - 1;
413         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
414         while (nz--) {
415           idx  = 3*(*vi++);
416           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
417           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
418           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
419           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
420           v   += 9;
421         }
422         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
423         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
424         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
425         idiag   -= 9;
426         i2      -= 3;
427       }
428       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
429     }
430   } else {
431     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
432   }
433   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
434   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
435   PetscFunctionReturn(0);
436 }
437 
438 #undef __FUNCT__
439 #define __FUNCT__ "MatPBRelax_SeqBAIJ_4"
440 PetscErrorCode MatPBRelax_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
441 {
442   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
443   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
444   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
445   const PetscScalar  *b;
446   PetscErrorCode     ierr;
447   PetscInt           m = a->mbs,i,i2,nz,idx;
448   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
449 
450   PetscFunctionBegin;
451   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
452   its = its*lits;
453   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
454   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
455   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
456   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
457   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
458 
459   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
460 
461   diag  = a->diag;
462   idiag = a->idiag;
463   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
464   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
465 
466   if (flag & SOR_ZERO_INITIAL_GUESS) {
467     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
468       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
469       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
470       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
471       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
472       i2     = 4;
473       idiag += 16;
474       for (i=1; i<m; i++) {
475 	v     = aa + 16*ai[i];
476 	vi    = aj + ai[i];
477 	nz    = diag[i] - ai[i];
478 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
479 	while (nz--) {
480 	  idx  = 4*(*vi++);
481 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
482 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
483 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
484 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
485 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
486 	  v   += 16;
487 	}
488 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
489 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
490 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
491 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
492         idiag   += 16;
493         i2      += 4;
494       }
495       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
496       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
497     }
498     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
499         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
500       i2    = 0;
501       mdiag = a->idiag+16*a->mbs;
502       for (i=0; i<m; i++) {
503         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
504         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
505         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
506         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
507         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
508         mdiag  += 16;
509         i2     += 4;
510       }
511       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
512     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
513       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
514     }
515     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
516       idiag   = a->idiag+16*a->mbs - 16;
517       i2      = 4*m - 4;
518       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
519       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
520       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
521       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
522       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
523       idiag -= 16;
524       i2    -= 4;
525       for (i=m-2; i>=0; i--) {
526 	v     = aa + 16*(diag[i]+1);
527 	vi    = aj + diag[i] + 1;
528 	nz    = ai[i+1] - diag[i] - 1;
529 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
530 	while (nz--) {
531 	  idx  = 4*(*vi++);
532 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
533 	  s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
534 	  s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
535 	  s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
536 	  s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
537 	  v   += 16;
538 	}
539 	x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
540 	x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
541 	x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
542 	x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
543         idiag   -= 16;
544         i2      -= 4;
545       }
546       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
547     }
548   } else {
549     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
550   }
551   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
552   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
553   PetscFunctionReturn(0);
554 }
555 
556 #undef __FUNCT__
557 #define __FUNCT__ "MatPBRelax_SeqBAIJ_5"
558 PetscErrorCode MatPBRelax_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
559 {
560   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
561   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
562   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
563   const PetscScalar  *b;
564   PetscErrorCode     ierr;
565   PetscInt           m = a->mbs,i,i2,nz,idx;
566   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
567 
568   PetscFunctionBegin;
569   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
570   its = its*lits;
571   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
572   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
573   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
574   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
575   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
576 
577   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
578 
579   diag  = a->diag;
580   idiag = a->idiag;
581   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
582   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
583 
584   if (flag & SOR_ZERO_INITIAL_GUESS) {
585     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
586       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
587       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
588       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
589       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
590       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
591       i2     = 5;
592       idiag += 25;
593       for (i=1; i<m; i++) {
594 	v     = aa + 25*ai[i];
595 	vi    = aj + ai[i];
596 	nz    = diag[i] - ai[i];
597 	s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
598 	while (nz--) {
599 	  idx  = 5*(*vi++);
600 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
601 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
602 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
603 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
604 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
605 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
606 	  v   += 25;
607 	}
608 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
609 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
610 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
611 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
612 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
613         idiag   += 25;
614         i2      += 5;
615       }
616       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
617       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
618     }
619     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
620         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
621       i2    = 0;
622       mdiag = a->idiag+25*a->mbs;
623       for (i=0; i<m; i++) {
624         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
625         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
626         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
627         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
628         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
629         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
630         mdiag  += 25;
631         i2     += 5;
632       }
633       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
634     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
635       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
636     }
637     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
638       idiag   = a->idiag+25*a->mbs - 25;
639       i2      = 5*m - 5;
640       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
641       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
642       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
643       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
644       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
645       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
646       idiag -= 25;
647       i2    -= 5;
648       for (i=m-2; i>=0; i--) {
649 	v     = aa + 25*(diag[i]+1);
650 	vi    = aj + diag[i] + 1;
651 	nz    = ai[i+1] - diag[i] - 1;
652 	s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
653 	while (nz--) {
654 	  idx  = 5*(*vi++);
655 	  x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
656 	  s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
657 	  s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
658 	  s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
659 	  s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
660 	  s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
661 	  v   += 25;
662 	}
663 	x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
664 	x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
665 	x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
666 	x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
667 	x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
668         idiag   -= 25;
669         i2      -= 5;
670       }
671       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
672     }
673   } else {
674     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
675   }
676   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
677   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
678   PetscFunctionReturn(0);
679 }
680 
681 #undef __FUNCT__
682 #define __FUNCT__ "MatPBRelax_SeqBAIJ_6"
683 PetscErrorCode MatPBRelax_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
684 {
685   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
686   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
687   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
688   const PetscScalar  *b;
689   PetscErrorCode     ierr;
690   PetscInt           m = a->mbs,i,i2,nz,idx;
691   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
692 
693   PetscFunctionBegin;
694   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
695   its = its*lits;
696   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
697   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
698   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
699   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
700   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
701 
702   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
703 
704   diag  = a->diag;
705   idiag = a->idiag;
706   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
707   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
708 
709   if (flag & SOR_ZERO_INITIAL_GUESS) {
710     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
711       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
712       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
713       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
714       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
715       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
716       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
717       i2     = 6;
718       idiag += 36;
719       for (i=1; i<m; i++) {
720         v     = aa + 36*ai[i];
721         vi    = aj + ai[i];
722         nz    = diag[i] - ai[i];
723         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
724         while (nz--) {
725           idx  = 6*(*vi++);
726           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
727           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
728           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
729           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
730           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
731           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
732           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
733           v   += 36;
734         }
735         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
736         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
737         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
738         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
739         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
740         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
741         idiag   += 36;
742         i2      += 6;
743       }
744       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
745       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
746     }
747     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
748         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
749       i2    = 0;
750       mdiag = a->idiag+36*a->mbs;
751       for (i=0; i<m; i++) {
752         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
753         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
754         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
755         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
756         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
757         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
758         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
759         mdiag  += 36;
760         i2     += 6;
761       }
762       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
763     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
764       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
765     }
766     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
767       idiag   = a->idiag+36*a->mbs - 36;
768       i2      = 6*m - 6;
769       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
770       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
771       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
772       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
773       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
774       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
775       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
776       idiag -= 36;
777       i2    -= 6;
778       for (i=m-2; i>=0; i--) {
779         v     = aa + 36*(diag[i]+1);
780         vi    = aj + diag[i] + 1;
781         nz    = ai[i+1] - diag[i] - 1;
782         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
783         while (nz--) {
784           idx  = 6*(*vi++);
785           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
786           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
787           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
788           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
789           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
790           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
791           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
792           v   += 36;
793         }
794         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
795         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
796         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
797         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
798         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
799         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
800         idiag   -= 36;
801         i2      -= 6;
802       }
803       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
804     }
805   } else {
806     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
807   }
808   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
809   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
810   PetscFunctionReturn(0);
811 }
812 
813 #undef __FUNCT__
814 #define __FUNCT__ "MatPBRelax_SeqBAIJ_7"
815 PetscErrorCode MatPBRelax_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
816 {
817   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
818   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
819   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
820   const PetscScalar  *b;
821   PetscErrorCode     ierr;
822   PetscInt           m = a->mbs,i,i2,nz,idx;
823   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
824 
825   PetscFunctionBegin;
826   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_ERR_SUP,"No support yet for Eisenstat");
827   its = its*lits;
828   if (its <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
829   if (fshift) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
830   if (omega != 1.0) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
831   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
832   if (its > 1) SETERRQ(PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
833 
834   if (!a->idiagvalid){ierr = MatInvertBlockDiagonal_SeqBAIJ(A);CHKERRQ(ierr);}
835 
836   diag  = a->diag;
837   idiag = a->idiag;
838   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
839   ierr = VecGetArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
840 
841   if (flag & SOR_ZERO_INITIAL_GUESS) {
842     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP){
843       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
844       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
845       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
846       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
847       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
848       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
849       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
850       i2     = 7;
851       idiag += 49;
852       for (i=1; i<m; i++) {
853         v     = aa + 49*ai[i];
854         vi    = aj + ai[i];
855         nz    = diag[i] - ai[i];
856         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
857         while (nz--) {
858           idx  = 7*(*vi++);
859           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
860           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
861           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
862           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
863           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
864           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
865           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
866           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
867           v   += 49;
868         }
869         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
870         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
871         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
872         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
873         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
874         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
875         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
876         idiag   += 49;
877         i2      += 7;
878       }
879       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
880       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
881     }
882     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
883         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
884       i2    = 0;
885       mdiag = a->idiag+49*a->mbs;
886       for (i=0; i<m; i++) {
887         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
888         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[35]*x7;
889         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[36]*x7;
890         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[37]*x7;
891         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[38]*x7;
892         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[39]*x7;
893         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[40]*x7;
894         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[41]*x7;
895         mdiag  += 36;
896         i2     += 6;
897       }
898       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
899     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
900       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
901     }
902     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP){
903       idiag   = a->idiag+49*a->mbs - 49;
904       i2      = 7*m - 7;
905       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
906       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
907       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
908       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
909       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
910       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
911       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
912       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
913       idiag -= 49;
914       i2    -= 7;
915       for (i=m-2; i>=0; i--) {
916         v     = aa + 49*(diag[i]+1);
917         vi    = aj + diag[i] + 1;
918         nz    = ai[i+1] - diag[i] - 1;
919         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
920         while (nz--) {
921           idx  = 7*(*vi++);
922           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
923           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
924           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
925           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
926           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
927           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
928           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
929           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
930           v   += 49;
931         }
932         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
933         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
934         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
935         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
936         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
937         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
938         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
939         idiag   -= 49;
940         i2      -= 7;
941       }
942       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
943     }
944   } else {
945     SETERRQ(PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
946   }
947   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
948   ierr = VecRestoreArray(bb,(PetscScalar**)&b);CHKERRQ(ierr);
949   PetscFunctionReturn(0);
950 }
951 
952 /*
953     Special version for direct calls from Fortran (Used in PETSc-fun3d)
954 */
955 #if defined(PETSC_HAVE_FORTRAN_CAPS)
956 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
957 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
958 #define matsetvaluesblocked4_ matsetvaluesblocked4
959 #endif
960 
961 EXTERN_C_BEGIN
962 #undef __FUNCT__
963 #define __FUNCT__ "matsetvaluesblocked4_"
964 void PETSCMAT_DLLEXPORT matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
965 {
966   Mat               A = *AA;
967   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
968   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
969   PetscInt          *ai=a->i,*ailen=a->ilen;
970   PetscInt          *aj=a->j,stepval,lastcol = -1;
971   const PetscScalar *value = v;
972   MatScalar         *ap,*aa = a->a,*bap;
973 
974   PetscFunctionBegin;
975   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
976   stepval = (n-1)*4;
977   for (k=0; k<m; k++) { /* loop over added rows */
978     row  = im[k];
979     rp   = aj + ai[row];
980     ap   = aa + 16*ai[row];
981     nrow = ailen[row];
982     low  = 0;
983     high = nrow;
984     for (l=0; l<n; l++) { /* loop over added columns */
985       col = in[l];
986       if (col <= lastcol) low = 0; else high = nrow;
987       lastcol = col;
988       value = v + k*(stepval+4 + l)*4;
989       while (high-low > 7) {
990         t = (low+high)/2;
991         if (rp[t] > col) high = t;
992         else             low  = t;
993       }
994       for (i=low; i<high; i++) {
995         if (rp[i] > col) break;
996         if (rp[i] == col) {
997           bap  = ap +  16*i;
998           for (ii=0; ii<4; ii++,value+=stepval) {
999             for (jj=ii; jj<16; jj+=4) {
1000               bap[jj] += *value++;
1001             }
1002           }
1003           goto noinsert2;
1004         }
1005       }
1006       N = nrow++ - 1;
1007       high++; /* added new column index thus must search to one higher than before */
1008       /* shift up all the later entries in this row */
1009       for (ii=N; ii>=i; ii--) {
1010         rp[ii+1] = rp[ii];
1011         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1012       }
1013       if (N >= i) {
1014         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1015       }
1016       rp[i] = col;
1017       bap   = ap +  16*i;
1018       for (ii=0; ii<4; ii++,value+=stepval) {
1019         for (jj=ii; jj<16; jj+=4) {
1020           bap[jj] = *value++;
1021         }
1022       }
1023       noinsert2:;
1024       low = i;
1025     }
1026     ailen[row] = nrow;
1027   }
1028   PetscFunctionReturnVoid();
1029 }
1030 EXTERN_C_END
1031 
1032 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1033 #define matsetvalues4_ MATSETVALUES4
1034 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1035 #define matsetvalues4_ matsetvalues4
1036 #endif
1037 
1038 EXTERN_C_BEGIN
1039 #undef __FUNCT__
1040 #define __FUNCT__ "MatSetValues4_"
1041 void PETSCMAT_DLLEXPORT matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1042 {
1043   Mat         A = *AA;
1044   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1045   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1046   PetscInt    *ai=a->i,*ailen=a->ilen;
1047   PetscInt    *aj=a->j,brow,bcol;
1048   PetscInt    ridx,cidx,lastcol = -1;
1049   MatScalar   *ap,value,*aa=a->a,*bap;
1050 
1051   PetscFunctionBegin;
1052   for (k=0; k<m; k++) { /* loop over added rows */
1053     row  = im[k]; brow = row/4;
1054     rp   = aj + ai[brow];
1055     ap   = aa + 16*ai[brow];
1056     nrow = ailen[brow];
1057     low  = 0;
1058     high = nrow;
1059     for (l=0; l<n; l++) { /* loop over added columns */
1060       col = in[l]; bcol = col/4;
1061       ridx = row % 4; cidx = col % 4;
1062       value = v[l + k*n];
1063       if (col <= lastcol) low = 0; else high = nrow;
1064       lastcol = col;
1065       while (high-low > 7) {
1066         t = (low+high)/2;
1067         if (rp[t] > bcol) high = t;
1068         else              low  = t;
1069       }
1070       for (i=low; i<high; i++) {
1071         if (rp[i] > bcol) break;
1072         if (rp[i] == bcol) {
1073           bap  = ap +  16*i + 4*cidx + ridx;
1074           *bap += value;
1075           goto noinsert1;
1076         }
1077       }
1078       N = nrow++ - 1;
1079       high++; /* added new column thus must search to one higher than before */
1080       /* shift up all the later entries in this row */
1081       for (ii=N; ii>=i; ii--) {
1082         rp[ii+1] = rp[ii];
1083         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1084       }
1085       if (N>=i) {
1086         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1087       }
1088       rp[i]                    = bcol;
1089       ap[16*i + 4*cidx + ridx] = value;
1090       noinsert1:;
1091       low = i;
1092     }
1093     ailen[brow] = nrow;
1094   }
1095   PetscFunctionReturnVoid();
1096 }
1097 EXTERN_C_END
1098 
1099 /*
1100      Checks for missing diagonals
1101 */
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1104 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscTruth *missing,PetscInt *d)
1105 {
1106   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108   PetscInt       *diag,*jj = a->j,i;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1112   *missing = PETSC_FALSE;
1113   if (A->rmap->n > 0 && !jj) {
1114     *missing  = PETSC_TRUE;
1115     if (d) *d = 0;
1116     PetscInfo(A,"Matrix has no entries therefor is missing diagonal");
1117   } else {
1118     diag     = a->diag;
1119     for (i=0; i<a->mbs; i++) {
1120       if (jj[diag[i]] != i) {
1121         *missing  = PETSC_TRUE;
1122         if (d) *d = i;
1123         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1124       }
1125     }
1126   }
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1132 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1133 {
1134   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1135   PetscErrorCode ierr;
1136   PetscInt       i,j,m = a->mbs;
1137 
1138   PetscFunctionBegin;
1139   if (!a->diag) {
1140     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1141   }
1142   for (i=0; i<m; i++) {
1143     a->diag[i] = a->i[i+1];
1144     for (j=a->i[i]; j<a->i[i+1]; j++) {
1145       if (a->j[j] == i) {
1146         a->diag[i] = j;
1147         break;
1148       }
1149     }
1150   }
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 
1155 EXTERN PetscErrorCode MatToSymmetricIJ_SeqAIJ(PetscInt,PetscInt*,PetscInt*,PetscInt,PetscInt,PetscInt**,PetscInt**);
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1159 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1160 {
1161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1162   PetscErrorCode ierr;
1163   PetscInt       i,j,n = a->mbs,nz = a->i[n],bs = A->rmap->bs,nbs = 1,k,l,cnt;
1164   PetscInt       *tia, *tja;
1165 
1166   PetscFunctionBegin;
1167   *nn = n;
1168   if (!ia) PetscFunctionReturn(0);
1169   if (symmetric) {
1170     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1171   } else {
1172     tia = a->i; tja = a->j;
1173   }
1174 
1175   if (!blockcompressed && bs > 1) {
1176     (*nn) *= bs;
1177     nbs    = bs;
1178     /* malloc & create the natural set of indices */
1179     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1180     if (n) {
1181       (*ia)[0] = 0;
1182       for (j=1; j<bs; j++) {
1183         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1184       }
1185     }
1186 
1187     for (i=1; i<n; i++) {
1188       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1189       for (j=1; j<bs; j++) {
1190         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1191       }
1192     }
1193     if (n) {
1194       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1195     }
1196 
1197     if (ja) {
1198       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1199       cnt = 0;
1200       for (i=0; i<n; i++) {
1201         for (j=0; j<bs; j++) {
1202           for (k=tia[i]; k<tia[i+1]; k++) {
1203             for (l=0; l<bs; l++) {
1204               (*ja)[cnt++] = bs*tja[k] + l;
1205 	    }
1206           }
1207         }
1208       }
1209     }
1210 
1211     n     *= bs;
1212     nz *= bs*bs;
1213     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1214       ierr = PetscFree(tia);CHKERRQ(ierr);
1215       ierr = PetscFree(tja);CHKERRQ(ierr);
1216     }
1217   } else {
1218     *ia = tia;
1219     if (ja) *ja = tja;
1220   }
1221   if (oshift == 1) {
1222     for (i=0; i<n+nbs; i++) (*ia)[i]++;
1223     if (ja) for (i=0; i<nz; i++) (*ja)[i]++;
1224   }
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1230 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscTruth symmetric,PetscTruth blockcompressed,PetscInt *nn,PetscInt *ia[],PetscInt *ja[],PetscTruth *done)
1231 {
1232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1233   PetscErrorCode ierr;
1234   PetscInt       i,n = a->mbs,nz = a->i[n];
1235 
1236   PetscFunctionBegin;
1237   if (!ia) PetscFunctionReturn(0);
1238   if (!blockcompressed && A->rmap->bs > 1) {
1239     ierr = PetscFree(*ia);CHKERRQ(ierr);
1240     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1241   } else if (symmetric) {
1242     ierr = PetscFree(*ia);CHKERRQ(ierr);
1243     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1244   } else if (oshift == 1) { /* blockcompressed */
1245     for (i=0; i<n+1; i++) a->i[i]--;
1246     if (ja) {for (i=0; i<nz; i++) a->j[i]--;}
1247   }
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1253 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1254 {
1255   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1256   PetscErrorCode ierr;
1257 
1258   PetscFunctionBegin;
1259 #if defined(PETSC_USE_LOG)
1260   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1261 #endif
1262   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1263   if (a->row) {
1264     ierr = ISDestroy(a->row);CHKERRQ(ierr);
1265   }
1266   if (a->col) {
1267     ierr = ISDestroy(a->col);CHKERRQ(ierr);
1268   }
1269   ierr = PetscFree(a->diag);CHKERRQ(ierr);
1270   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1271   ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1272   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1273   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1274   if (a->icol) {ierr = ISDestroy(a->icol);CHKERRQ(ierr);}
1275   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1276   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1277   if (a->compressedrow.use){ierr = PetscFree(a->compressedrow.i);}
1278 
1279   if (a->sbaijMat) {ierr = MatDestroy(a->sbaijMat);CHKERRQ(ierr);}
1280   ierr = PetscFree(a);CHKERRQ(ierr);
1281 
1282   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1283   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1284   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1285   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1286   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1287   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1288   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1289   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1290   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1296 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscTruth flg)
1297 {
1298   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1299   PetscErrorCode ierr;
1300 
1301   PetscFunctionBegin;
1302   switch (op) {
1303   case MAT_ROW_ORIENTED:
1304     a->roworiented    = flg;
1305     break;
1306   case MAT_KEEP_NONZERO_PATTERN:
1307     a->keepnonzeropattern = flg;
1308     break;
1309   case MAT_NEW_NONZERO_LOCATIONS:
1310     a->nonew          = (flg ? 0 : 1);
1311     break;
1312   case MAT_NEW_NONZERO_LOCATION_ERR:
1313     a->nonew          = (flg ? -1 : 0);
1314     break;
1315   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1316     a->nonew          = (flg ? -2 : 0);
1317     break;
1318   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1319     a->nounused       = (flg ? -1 : 0);
1320     break;
1321   case MAT_NEW_DIAGONALS:
1322   case MAT_IGNORE_OFF_PROC_ENTRIES:
1323   case MAT_USE_HASH_TABLE:
1324     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1325     break;
1326   case MAT_SYMMETRIC:
1327   case MAT_STRUCTURALLY_SYMMETRIC:
1328   case MAT_HERMITIAN:
1329   case MAT_SYMMETRY_ETERNAL:
1330     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1331     break;
1332   default:
1333     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1334   }
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1340 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1345   MatScalar      *aa,*aa_i;
1346   PetscScalar    *v_i;
1347 
1348   PetscFunctionBegin;
1349   bs  = A->rmap->bs;
1350   ai  = a->i;
1351   aj  = a->j;
1352   aa  = a->a;
1353   bs2 = a->bs2;
1354 
1355   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1356 
1357   bn  = row/bs;   /* Block number */
1358   bp  = row % bs; /* Block Position */
1359   M   = ai[bn+1] - ai[bn];
1360   *nz = bs*M;
1361 
1362   if (v) {
1363     *v = 0;
1364     if (*nz) {
1365       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1366       for (i=0; i<M; i++) { /* for each block in the block row */
1367         v_i  = *v + i*bs;
1368         aa_i = aa + bs2*(ai[bn] + i);
1369         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1370       }
1371     }
1372   }
1373 
1374   if (idx) {
1375     *idx = 0;
1376     if (*nz) {
1377       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1378       for (i=0; i<M; i++) { /* for each block in the block row */
1379         idx_i = *idx + i*bs;
1380         itmp  = bs*aj[ai[bn] + i];
1381         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1382       }
1383     }
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1390 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1391 {
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1396   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1402 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1403 {
1404   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1405   Mat            C;
1406   PetscErrorCode ierr;
1407   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1408   PetscInt       *rows,*cols,bs2=a->bs2;
1409   MatScalar      *array;
1410 
1411   PetscFunctionBegin;
1412   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1413   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1414     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1415     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1416 
1417     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1418     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1419     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1420     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1421     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,PETSC_NULL,col);CHKERRQ(ierr);
1422     ierr = PetscFree(col);CHKERRQ(ierr);
1423   } else {
1424     C = *B;
1425   }
1426 
1427   array = a->a;
1428   ierr = PetscMalloc(2*bs*sizeof(PetscInt),&rows);CHKERRQ(ierr);
1429   cols = rows + bs;
1430   for (i=0; i<mbs; i++) {
1431     cols[0] = i*bs;
1432     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1433     len = ai[i+1] - ai[i];
1434     for (j=0; j<len; j++) {
1435       rows[0] = (*aj++)*bs;
1436       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1437       ierr = MatSetValues(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1438       array += bs2;
1439     }
1440   }
1441   ierr = PetscFree(rows);CHKERRQ(ierr);
1442 
1443   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1444   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1445 
1446   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1447     *B = C;
1448   } else {
1449     ierr = MatHeaderCopy(A,C);CHKERRQ(ierr);
1450   }
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 #undef __FUNCT__
1455 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1456 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1457 {
1458   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1459   PetscErrorCode ierr;
1460   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1461   int            fd;
1462   PetscScalar    *aa;
1463   FILE           *file;
1464 
1465   PetscFunctionBegin;
1466   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1467   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1468   col_lens[0] = MAT_FILE_COOKIE;
1469 
1470   col_lens[1] = A->rmap->N;
1471   col_lens[2] = A->cmap->n;
1472   col_lens[3] = a->nz*bs2;
1473 
1474   /* store lengths of each row and write (including header) to file */
1475   count = 0;
1476   for (i=0; i<a->mbs; i++) {
1477     for (j=0; j<bs; j++) {
1478       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1479     }
1480   }
1481   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1482   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1483 
1484   /* store column indices (zero start index) */
1485   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1486   count = 0;
1487   for (i=0; i<a->mbs; i++) {
1488     for (j=0; j<bs; j++) {
1489       for (k=a->i[i]; k<a->i[i+1]; k++) {
1490         for (l=0; l<bs; l++) {
1491           jj[count++] = bs*a->j[k] + l;
1492         }
1493       }
1494     }
1495   }
1496   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1497   ierr = PetscFree(jj);CHKERRQ(ierr);
1498 
1499   /* store nonzero values */
1500   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1501   count = 0;
1502   for (i=0; i<a->mbs; i++) {
1503     for (j=0; j<bs; j++) {
1504       for (k=a->i[i]; k<a->i[i+1]; k++) {
1505         for (l=0; l<bs; l++) {
1506           aa[count++] = a->a[bs2*k + l*bs + j];
1507         }
1508       }
1509     }
1510   }
1511   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1512   ierr = PetscFree(aa);CHKERRQ(ierr);
1513 
1514   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1515   if (file) {
1516     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1517   }
1518   PetscFunctionReturn(0);
1519 }
1520 
1521 #undef __FUNCT__
1522 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1523 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1524 {
1525   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1526   PetscErrorCode    ierr;
1527   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1528   PetscViewerFormat format;
1529 
1530   PetscFunctionBegin;
1531   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1532   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1533     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1534   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1535     Mat aij;
1536     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1537     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1538     ierr = MatDestroy(aij);CHKERRQ(ierr);
1539   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1540      PetscFunctionReturn(0);
1541   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1542     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1543     for (i=0; i<a->mbs; i++) {
1544       for (j=0; j<bs; j++) {
1545         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1546         for (k=a->i[i]; k<a->i[i+1]; k++) {
1547           for (l=0; l<bs; l++) {
1548 #if defined(PETSC_USE_COMPLEX)
1549             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1550               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1551                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1553               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1554                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1555             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1556               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1557             }
1558 #else
1559             if (a->a[bs2*k + l*bs + j] != 0.0) {
1560               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1561             }
1562 #endif
1563           }
1564         }
1565         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1566       }
1567     }
1568     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1569   } else {
1570     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_NO);CHKERRQ(ierr);
1571     for (i=0; i<a->mbs; i++) {
1572       for (j=0; j<bs; j++) {
1573         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1574         for (k=a->i[i]; k<a->i[i+1]; k++) {
1575           for (l=0; l<bs; l++) {
1576 #if defined(PETSC_USE_COMPLEX)
1577             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1578               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1579                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1580             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1581               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1582                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1583             } else {
1584               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1585             }
1586 #else
1587             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1588 #endif
1589           }
1590         }
1591         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1592       }
1593     }
1594     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_YES);CHKERRQ(ierr);
1595   }
1596   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1597   PetscFunctionReturn(0);
1598 }
1599 
1600 #undef __FUNCT__
1601 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1602 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1603 {
1604   Mat            A = (Mat) Aa;
1605   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1606   PetscErrorCode ierr;
1607   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1608   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1609   MatScalar      *aa;
1610   PetscViewer    viewer;
1611 
1612   PetscFunctionBegin;
1613 
1614   /* still need to add support for contour plot of nonzeros; see MatView_SeqAIJ_Draw_Zoom()*/
1615   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1616 
1617   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1618 
1619   /* loop over matrix elements drawing boxes */
1620   color = PETSC_DRAW_BLUE;
1621   for (i=0,row=0; i<mbs; i++,row+=bs) {
1622     for (j=a->i[i]; j<a->i[i+1]; j++) {
1623       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1624       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1625       aa = a->a + j*bs2;
1626       for (k=0; k<bs; k++) {
1627         for (l=0; l<bs; l++) {
1628           if (PetscRealPart(*aa++) >=  0.) continue;
1629           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1630         }
1631       }
1632     }
1633   }
1634   color = PETSC_DRAW_CYAN;
1635   for (i=0,row=0; i<mbs; i++,row+=bs) {
1636     for (j=a->i[i]; j<a->i[i+1]; j++) {
1637       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639       aa = a->a + j*bs2;
1640       for (k=0; k<bs; k++) {
1641         for (l=0; l<bs; l++) {
1642           if (PetscRealPart(*aa++) != 0.) continue;
1643           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1644         }
1645       }
1646     }
1647   }
1648 
1649   color = PETSC_DRAW_RED;
1650   for (i=0,row=0; i<mbs; i++,row+=bs) {
1651     for (j=a->i[i]; j<a->i[i+1]; j++) {
1652       y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1653       x_l = a->j[j]*bs; x_r = x_l + 1.0;
1654       aa = a->a + j*bs2;
1655       for (k=0; k<bs; k++) {
1656         for (l=0; l<bs; l++) {
1657           if (PetscRealPart(*aa++) <= 0.) continue;
1658           ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1659         }
1660       }
1661     }
1662   }
1663   PetscFunctionReturn(0);
1664 }
1665 
1666 #undef __FUNCT__
1667 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1668 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1669 {
1670   PetscErrorCode ierr;
1671   PetscReal      xl,yl,xr,yr,w,h;
1672   PetscDraw      draw;
1673   PetscTruth     isnull;
1674 
1675   PetscFunctionBegin;
1676 
1677   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1678   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1679 
1680   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1681   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1682   xr += w;    yr += h;  xl = -w;     yl = -h;
1683   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1684   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1685   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1686   PetscFunctionReturn(0);
1687 }
1688 
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatView_SeqBAIJ"
1691 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1692 {
1693   PetscErrorCode ierr;
1694   PetscTruth     iascii,isbinary,isdraw;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
1698   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
1699   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
1700   if (iascii){
1701     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1702   } else if (isbinary) {
1703     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1704   } else if (isdraw) {
1705     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1706   } else {
1707     Mat B;
1708     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1709     ierr = MatView(B,viewer);CHKERRQ(ierr);
1710     ierr = MatDestroy(B);CHKERRQ(ierr);
1711   }
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 
1716 #undef __FUNCT__
1717 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1718 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1719 {
1720   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1721   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1722   PetscInt    *ai = a->i,*ailen = a->ilen;
1723   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1724   MatScalar   *ap,*aa = a->a;
1725 
1726   PetscFunctionBegin;
1727   for (k=0; k<m; k++) { /* loop over rows */
1728     row  = im[k]; brow = row/bs;
1729     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1730     if (row >= A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1731     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1732     nrow = ailen[brow];
1733     for (l=0; l<n; l++) { /* loop over columns */
1734       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1735       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1736       col  = in[l] ;
1737       bcol = col/bs;
1738       cidx = col%bs;
1739       ridx = row%bs;
1740       high = nrow;
1741       low  = 0; /* assume unsorted */
1742       while (high-low > 5) {
1743         t = (low+high)/2;
1744         if (rp[t] > bcol) high = t;
1745         else             low  = t;
1746       }
1747       for (i=low; i<high; i++) {
1748         if (rp[i] > bcol) break;
1749         if (rp[i] == bcol) {
1750           *v++ = ap[bs2*i+bs*cidx+ridx];
1751           goto finished;
1752         }
1753       }
1754       *v++ = 0.0;
1755       finished:;
1756     }
1757   }
1758   PetscFunctionReturn(0);
1759 }
1760 
1761 #define CHUNKSIZE 10
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1764 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1765 {
1766   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1767   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1768   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1769   PetscErrorCode    ierr;
1770   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1771   PetscTruth        roworiented=a->roworiented;
1772   const PetscScalar *value = v;
1773   MatScalar         *ap,*aa = a->a,*bap;
1774 
1775   PetscFunctionBegin;
1776   if (roworiented) {
1777     stepval = (n-1)*bs;
1778   } else {
1779     stepval = (m-1)*bs;
1780   }
1781   for (k=0; k<m; k++) { /* loop over added rows */
1782     row  = im[k];
1783     if (row < 0) continue;
1784 #if defined(PETSC_USE_DEBUG)
1785     if (row >= a->mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1786 #endif
1787     rp   = aj + ai[row];
1788     ap   = aa + bs2*ai[row];
1789     rmax = imax[row];
1790     nrow = ailen[row];
1791     low  = 0;
1792     high = nrow;
1793     for (l=0; l<n; l++) { /* loop over added columns */
1794       if (in[l] < 0) continue;
1795 #if defined(PETSC_USE_DEBUG)
1796       if (in[l] >= a->nbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1797 #endif
1798       col = in[l];
1799       if (roworiented) {
1800         value = v + k*(stepval+bs)*bs + l*bs;
1801       } else {
1802         value = v + l*(stepval+bs)*bs + k*bs;
1803       }
1804       if (col <= lastcol) low = 0; else high = nrow;
1805       lastcol = col;
1806       while (high-low > 7) {
1807         t = (low+high)/2;
1808         if (rp[t] > col) high = t;
1809         else             low  = t;
1810       }
1811       for (i=low; i<high; i++) {
1812         if (rp[i] > col) break;
1813         if (rp[i] == col) {
1814           bap  = ap +  bs2*i;
1815           if (roworiented) {
1816             if (is == ADD_VALUES) {
1817               for (ii=0; ii<bs; ii++,value+=stepval) {
1818                 for (jj=ii; jj<bs2; jj+=bs) {
1819                   bap[jj] += *value++;
1820                 }
1821               }
1822             } else {
1823               for (ii=0; ii<bs; ii++,value+=stepval) {
1824                 for (jj=ii; jj<bs2; jj+=bs) {
1825                   bap[jj] = *value++;
1826                 }
1827               }
1828             }
1829           } else {
1830             if (is == ADD_VALUES) {
1831               for (ii=0; ii<bs; ii++,value+=stepval) {
1832                 for (jj=0; jj<bs; jj++) {
1833                   *bap++ += *value++;
1834                 }
1835               }
1836             } else {
1837               for (ii=0; ii<bs; ii++,value+=stepval) {
1838                 for (jj=0; jj<bs; jj++) {
1839                   *bap++  = *value++;
1840                 }
1841               }
1842             }
1843           }
1844           goto noinsert2;
1845         }
1846       }
1847       if (nonew == 1) goto noinsert2;
1848       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1849       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1850       N = nrow++ - 1; high++;
1851       /* shift up all the later entries in this row */
1852       for (ii=N; ii>=i; ii--) {
1853         rp[ii+1] = rp[ii];
1854         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1855       }
1856       if (N >= i) {
1857         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1858       }
1859       rp[i] = col;
1860       bap   = ap +  bs2*i;
1861       if (roworiented) {
1862         for (ii=0; ii<bs; ii++,value+=stepval) {
1863           for (jj=ii; jj<bs2; jj+=bs) {
1864             bap[jj] = *value++;
1865           }
1866         }
1867       } else {
1868         for (ii=0; ii<bs; ii++,value+=stepval) {
1869           for (jj=0; jj<bs; jj++) {
1870             *bap++  = *value++;
1871           }
1872         }
1873       }
1874       noinsert2:;
1875       low = i;
1876     }
1877     ailen[row] = nrow;
1878   }
1879   PetscFunctionReturn(0);
1880 }
1881 
1882 #undef __FUNCT__
1883 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1884 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1885 {
1886   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1887   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1888   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
1889   PetscErrorCode ierr;
1890   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
1891   MatScalar      *aa = a->a,*ap;
1892   PetscReal      ratio=0.6;
1893 
1894   PetscFunctionBegin;
1895   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1896 
1897   if (m) rmax = ailen[0];
1898   for (i=1; i<mbs; i++) {
1899     /* move each row back by the amount of empty slots (fshift) before it*/
1900     fshift += imax[i-1] - ailen[i-1];
1901     rmax   = PetscMax(rmax,ailen[i]);
1902     if (fshift) {
1903       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1904       N = ailen[i];
1905       for (j=0; j<N; j++) {
1906         ip[j-fshift] = ip[j];
1907         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1908       }
1909     }
1910     ai[i] = ai[i-1] + ailen[i-1];
1911   }
1912   if (mbs) {
1913     fshift += imax[mbs-1] - ailen[mbs-1];
1914     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1915   }
1916   /* reset ilen and imax for each row */
1917   for (i=0; i<mbs; i++) {
1918     ailen[i] = imax[i] = ai[i+1] - ai[i];
1919   }
1920   a->nz = ai[mbs];
1921 
1922   /* diagonals may have moved, so kill the diagonal pointers */
1923   a->idiagvalid = PETSC_FALSE;
1924   if (fshift && a->diag) {
1925     ierr = PetscFree(a->diag);CHKERRQ(ierr);
1926     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1927     a->diag = 0;
1928   }
1929   if (fshift && a->nounused == -1) {
1930     SETERRQ4(PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1931   }
1932   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1933   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1934   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1935   a->reallocs          = 0;
1936   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
1937 
1938   /* check for zero rows. If found a large number of zero rows, use CompressedRow functions */
1939   if (a->compressedrow.use){
1940     ierr = Mat_CheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1941   }
1942 
1943   A->same_nonzero = PETSC_TRUE;
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 /*
1948    This function returns an array of flags which indicate the locations of contiguous
1949    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1950    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1951    Assume: sizes should be long enough to hold all the values.
1952 */
1953 #undef __FUNCT__
1954 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1955 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1956 {
1957   PetscInt   i,j,k,row;
1958   PetscTruth flg;
1959 
1960   PetscFunctionBegin;
1961   for (i=0,j=0; i<n; j++) {
1962     row = idx[i];
1963     if (row%bs!=0) { /* Not the begining of a block */
1964       sizes[j] = 1;
1965       i++;
1966     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1967       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1968       i++;
1969     } else { /* Begining of the block, so check if the complete block exists */
1970       flg = PETSC_TRUE;
1971       for (k=1; k<bs; k++) {
1972         if (row+k != idx[i+k]) { /* break in the block */
1973           flg = PETSC_FALSE;
1974           break;
1975         }
1976       }
1977       if (flg) { /* No break in the bs */
1978         sizes[j] = bs;
1979         i+= bs;
1980       } else {
1981         sizes[j] = 1;
1982         i++;
1983       }
1984     }
1985   }
1986   *bs_max = j;
1987   PetscFunctionReturn(0);
1988 }
1989 
1990 #undef __FUNCT__
1991 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1992 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag)
1993 {
1994   Mat_SeqBAIJ    *baij=(Mat_SeqBAIJ*)A->data;
1995   PetscErrorCode ierr;
1996   PetscInt       i,j,k,count,*rows;
1997   PetscInt       bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1998   PetscScalar    zero = 0.0;
1999   MatScalar      *aa;
2000 
2001   PetscFunctionBegin;
2002   /* Make a copy of the IS and  sort it */
2003   /* allocate memory for rows,sizes */
2004   ierr  = PetscMalloc((3*is_n+1)*sizeof(PetscInt),&rows);CHKERRQ(ierr);
2005   sizes = rows + is_n;
2006 
2007   /* copy IS values to rows, and sort them */
2008   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2009   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2010   if (baij->keepnonzeropattern) {
2011     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2012     bs_max = is_n;
2013     A->same_nonzero = PETSC_TRUE;
2014   } else {
2015     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2016     A->same_nonzero = PETSC_FALSE;
2017   }
2018 
2019   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2020     row   = rows[j];
2021     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2022     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2023     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2024     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2025       if (diag != 0.0) {
2026         if (baij->ilen[row/bs] > 0) {
2027           baij->ilen[row/bs]       = 1;
2028           baij->j[baij->i[row/bs]] = row/bs;
2029           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2030         }
2031         /* Now insert all the diagonal values for this bs */
2032         for (k=0; k<bs; k++) {
2033           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2034         }
2035       } else { /* (diag == 0.0) */
2036         baij->ilen[row/bs] = 0;
2037       } /* end (diag == 0.0) */
2038     } else { /* (sizes[i] != bs) */
2039 #if defined (PETSC_USE_DEBUG)
2040       if (sizes[i] != 1) SETERRQ(PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2041 #endif
2042       for (k=0; k<count; k++) {
2043         aa[0] =  zero;
2044         aa    += bs;
2045       }
2046       if (diag != 0.0) {
2047         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2048       }
2049     }
2050   }
2051 
2052   ierr = PetscFree(rows);CHKERRQ(ierr);
2053   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2054   PetscFunctionReturn(0);
2055 }
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2059 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2060 {
2061   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2062   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2063   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2064   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2065   PetscErrorCode ierr;
2066   PetscInt       ridx,cidx,bs2=a->bs2;
2067   PetscTruth     roworiented=a->roworiented;
2068   MatScalar      *ap,value,*aa=a->a,*bap;
2069 
2070   PetscFunctionBegin;
2071   for (k=0; k<m; k++) { /* loop over added rows */
2072     row  = im[k];
2073     brow = row/bs;
2074     if (row < 0) continue;
2075 #if defined(PETSC_USE_DEBUG)
2076     if (row >= A->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2077 #endif
2078     rp   = aj + ai[brow];
2079     ap   = aa + bs2*ai[brow];
2080     rmax = imax[brow];
2081     nrow = ailen[brow];
2082     low  = 0;
2083     high = nrow;
2084     for (l=0; l<n; l++) { /* loop over added columns */
2085       if (in[l] < 0) continue;
2086 #if defined(PETSC_USE_DEBUG)
2087       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2088 #endif
2089       col = in[l]; bcol = col/bs;
2090       ridx = row % bs; cidx = col % bs;
2091       if (roworiented) {
2092         value = v[l + k*n];
2093       } else {
2094         value = v[k + l*m];
2095       }
2096       if (col <= lastcol) low = 0; else high = nrow;
2097       lastcol = col;
2098       while (high-low > 7) {
2099         t = (low+high)/2;
2100         if (rp[t] > bcol) high = t;
2101         else              low  = t;
2102       }
2103       for (i=low; i<high; i++) {
2104         if (rp[i] > bcol) break;
2105         if (rp[i] == bcol) {
2106           bap  = ap +  bs2*i + bs*cidx + ridx;
2107           if (is == ADD_VALUES) *bap += value;
2108           else                  *bap  = value;
2109           goto noinsert1;
2110         }
2111       }
2112       if (nonew == 1) goto noinsert1;
2113       if (nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2114       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2115       N = nrow++ - 1; high++;
2116       /* shift up all the later entries in this row */
2117       for (ii=N; ii>=i; ii--) {
2118         rp[ii+1] = rp[ii];
2119         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2120       }
2121       if (N>=i) {
2122         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2123       }
2124       rp[i]                      = bcol;
2125       ap[bs2*i + bs*cidx + ridx] = value;
2126       a->nz++;
2127       noinsert1:;
2128       low = i;
2129     }
2130     ailen[brow] = nrow;
2131   }
2132   A->same_nonzero = PETSC_FALSE;
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 EXTERN PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat,PetscTruth);
2137 
2138 #undef __FUNCT__
2139 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2140 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2141 {
2142   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2143   Mat            outA;
2144   PetscErrorCode ierr;
2145   PetscTruth     row_identity,col_identity;
2146 
2147   PetscFunctionBegin;
2148   if (info->levels != 0) SETERRQ(PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2149   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2150   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2151   if (!row_identity || !col_identity) {
2152     SETERRQ(PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2153   }
2154 
2155   outA          = inA;
2156   inA->factor   = MAT_FACTOR_LU;
2157 
2158   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2159 
2160   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2161   if (a->row) { ierr = ISDestroy(a->row);CHKERRQ(ierr); }
2162   a->row = row;
2163   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2164   if (a->col) { ierr = ISDestroy(a->col);CHKERRQ(ierr); }
2165   a->col = col;
2166 
2167   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2168   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2169   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2170 
2171   ierr = MatSeqBAIJSetNumericFactorization(inA,(PetscTruth)(row_identity && col_identity));CHKERRQ(ierr);
2172   if (!a->solve_work) {
2173     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2174     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2175   }
2176   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2177 
2178   PetscFunctionReturn(0);
2179 }
2180 
2181 EXTERN_C_BEGIN
2182 #undef __FUNCT__
2183 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2184 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2185 {
2186   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2187   PetscInt    i,nz,mbs;
2188 
2189   PetscFunctionBegin;
2190   nz  = baij->maxnz/baij->bs2;
2191   mbs = baij->mbs;
2192   for (i=0; i<nz; i++) {
2193     baij->j[i] = indices[i];
2194   }
2195   baij->nz = nz;
2196   for (i=0; i<mbs; i++) {
2197     baij->ilen[i] = baij->imax[i];
2198   }
2199   PetscFunctionReturn(0);
2200 }
2201 EXTERN_C_END
2202 
2203 #undef __FUNCT__
2204 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2205 /*@
2206     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2207        in the matrix.
2208 
2209   Input Parameters:
2210 +  mat - the SeqBAIJ matrix
2211 -  indices - the column indices
2212 
2213   Level: advanced
2214 
2215   Notes:
2216     This can be called if you have precomputed the nonzero structure of the
2217   matrix and want to provide it to the matrix object to improve the performance
2218   of the MatSetValues() operation.
2219 
2220     You MUST have set the correct numbers of nonzeros per row in the call to
2221   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2222 
2223     MUST be called before any calls to MatSetValues();
2224 
2225 @*/
2226 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2227 {
2228   PetscErrorCode ierr,(*f)(Mat,PetscInt *);
2229 
2230   PetscFunctionBegin;
2231   PetscValidHeaderSpecific(mat,MAT_COOKIE,1);
2232   PetscValidPointer(indices,2);
2233   ierr = PetscObjectQueryFunction((PetscObject)mat,"MatSeqBAIJSetColumnIndices_C",(void (**)(void))&f);CHKERRQ(ierr);
2234   if (f) {
2235     ierr = (*f)(mat,indices);CHKERRQ(ierr);
2236   } else {
2237     SETERRQ(PETSC_ERR_ARG_WRONG,"Wrong type of matrix to set column indices");
2238   }
2239   PetscFunctionReturn(0);
2240 }
2241 
2242 #undef __FUNCT__
2243 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2244 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2245 {
2246   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2247   PetscErrorCode ierr;
2248   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2249   PetscReal      atmp;
2250   PetscScalar    *x,zero = 0.0;
2251   MatScalar      *aa;
2252   PetscInt       ncols,brow,krow,kcol;
2253 
2254   PetscFunctionBegin;
2255   if (A->factor) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2256   bs   = A->rmap->bs;
2257   aa   = a->a;
2258   ai   = a->i;
2259   aj   = a->j;
2260   mbs  = a->mbs;
2261 
2262   ierr = VecSet(v,zero);CHKERRQ(ierr);
2263   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2264   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2265   if (n != A->rmap->N) SETERRQ(PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2266   for (i=0; i<mbs; i++) {
2267     ncols = ai[1] - ai[0]; ai++;
2268     brow  = bs*i;
2269     for (j=0; j<ncols; j++){
2270       for (kcol=0; kcol<bs; kcol++){
2271         for (krow=0; krow<bs; krow++){
2272           atmp = PetscAbsScalar(*aa);aa++;
2273           row = brow + krow;    /* row index */
2274           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2275           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2276         }
2277       }
2278       aj++;
2279     }
2280   }
2281   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2282   PetscFunctionReturn(0);
2283 }
2284 
2285 #undef __FUNCT__
2286 #define __FUNCT__ "MatCopy_SeqBAIJ"
2287 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2288 {
2289   PetscErrorCode ierr;
2290 
2291   PetscFunctionBegin;
2292   /* If the two matrices have the same copy implementation, use fast copy. */
2293   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2294     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2295     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2296 
2297     if (a->i[A->rmap->N] != b->i[B->rmap->N]) {
2298       SETERRQ(PETSC_ERR_ARG_INCOMP,"Number of nonzeros in two matrices are different");
2299     }
2300     ierr = PetscMemcpy(b->a,a->a,(a->i[A->rmap->N])*sizeof(PetscScalar));CHKERRQ(ierr);
2301   } else {
2302     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2303   }
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatSetUpPreallocation_SeqBAIJ"
2309 PetscErrorCode MatSetUpPreallocation_SeqBAIJ(Mat A)
2310 {
2311   PetscErrorCode ierr;
2312 
2313   PetscFunctionBegin;
2314   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0);CHKERRQ(ierr);
2315   PetscFunctionReturn(0);
2316 }
2317 
2318 #undef __FUNCT__
2319 #define __FUNCT__ "MatGetArray_SeqBAIJ"
2320 PetscErrorCode MatGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2321 {
2322   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2323   PetscFunctionBegin;
2324   *array = a->a;
2325   PetscFunctionReturn(0);
2326 }
2327 
2328 #undef __FUNCT__
2329 #define __FUNCT__ "MatRestoreArray_SeqBAIJ"
2330 PetscErrorCode MatRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2331 {
2332   PetscFunctionBegin;
2333   PetscFunctionReturn(0);
2334 }
2335 
2336 #include "petscblaslapack.h"
2337 #undef __FUNCT__
2338 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2339 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2340 {
2341   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2342   PetscErrorCode ierr;
2343   PetscInt       i,bs=Y->rmap->bs,j,bs2;
2344   PetscBLASInt   one=1,bnz = PetscBLASIntCast(x->nz);
2345 
2346   PetscFunctionBegin;
2347   if (str == SAME_NONZERO_PATTERN) {
2348     PetscScalar alpha = a;
2349     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2350   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2351     if (y->xtoy && y->XtoY != X) {
2352       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2353       ierr = MatDestroy(y->XtoY);CHKERRQ(ierr);
2354     }
2355     if (!y->xtoy) { /* get xtoy */
2356       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2357       y->XtoY = X;
2358       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2359     }
2360     bs2 = bs*bs;
2361     for (i=0; i<x->nz; i++) {
2362       j = 0;
2363       while (j < bs2){
2364         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2365         j++;
2366       }
2367     }
2368     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2369   } else {
2370     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2371   }
2372   PetscFunctionReturn(0);
2373 }
2374 
2375 #undef __FUNCT__
2376 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2377 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2378 {
2379   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2380   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2381   MatScalar      *aa = a->a;
2382 
2383   PetscFunctionBegin;
2384   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 #undef __FUNCT__
2389 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2390 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2391 {
2392   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2393   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2394   MatScalar      *aa = a->a;
2395 
2396   PetscFunctionBegin;
2397   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2398   PetscFunctionReturn(0);
2399 }
2400 
2401 
2402 /* -------------------------------------------------------------------*/
2403 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2404        MatGetRow_SeqBAIJ,
2405        MatRestoreRow_SeqBAIJ,
2406        MatMult_SeqBAIJ_N,
2407 /* 4*/ MatMultAdd_SeqBAIJ_N,
2408        MatMultTranspose_SeqBAIJ,
2409        MatMultTransposeAdd_SeqBAIJ,
2410        0,
2411        0,
2412        0,
2413 /*10*/ 0,
2414        MatLUFactor_SeqBAIJ,
2415        0,
2416        0,
2417        MatTranspose_SeqBAIJ,
2418 /*15*/ MatGetInfo_SeqBAIJ,
2419        MatEqual_SeqBAIJ,
2420        MatGetDiagonal_SeqBAIJ,
2421        MatDiagonalScale_SeqBAIJ,
2422        MatNorm_SeqBAIJ,
2423 /*20*/ 0,
2424        MatAssemblyEnd_SeqBAIJ,
2425        MatSetOption_SeqBAIJ,
2426        MatZeroEntries_SeqBAIJ,
2427 /*24*/ MatZeroRows_SeqBAIJ,
2428        0,
2429        0,
2430        0,
2431        0,
2432 /*29*/ MatSetUpPreallocation_SeqBAIJ,
2433        0,
2434        0,
2435        MatGetArray_SeqBAIJ,
2436        MatRestoreArray_SeqBAIJ,
2437 /*34*/ MatDuplicate_SeqBAIJ,
2438        0,
2439        0,
2440        MatILUFactor_SeqBAIJ,
2441        0,
2442 /*39*/ MatAXPY_SeqBAIJ,
2443        MatGetSubMatrices_SeqBAIJ,
2444        MatIncreaseOverlap_SeqBAIJ,
2445        MatGetValues_SeqBAIJ,
2446        MatCopy_SeqBAIJ,
2447 /*44*/ 0,
2448        MatScale_SeqBAIJ,
2449        0,
2450        0,
2451        MatILUDTFactor_SeqBAIJ,
2452 /*49*/ 0,
2453        MatGetRowIJ_SeqBAIJ,
2454        MatRestoreRowIJ_SeqBAIJ,
2455        0,
2456        0,
2457 /*54*/ 0,
2458        0,
2459        0,
2460        0,
2461        MatSetValuesBlocked_SeqBAIJ,
2462 /*59*/ MatGetSubMatrix_SeqBAIJ,
2463        MatDestroy_SeqBAIJ,
2464        MatView_SeqBAIJ,
2465        0,
2466        0,
2467 /*64*/ 0,
2468        0,
2469        0,
2470        0,
2471        0,
2472 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2473        0,
2474        MatConvert_Basic,
2475        0,
2476        0,
2477 /*74*/ 0,
2478        0,
2479        0,
2480        0,
2481        0,
2482 /*79*/ 0,
2483        0,
2484        0,
2485        0,
2486        MatLoad_SeqBAIJ,
2487 /*84*/ 0,
2488        0,
2489        0,
2490        0,
2491        0,
2492 /*89*/ 0,
2493        0,
2494        0,
2495        0,
2496        0,
2497 /*94*/ 0,
2498        0,
2499        0,
2500        0,
2501        0,
2502 /*99*/0,
2503        0,
2504        0,
2505        0,
2506        0,
2507 /*104*/0,
2508        MatRealPart_SeqBAIJ,
2509        MatImaginaryPart_SeqBAIJ,
2510        0,
2511        0,
2512 /*109*/0,
2513        0,
2514        0,
2515        0,
2516        MatMissingDiagonal_SeqBAIJ
2517 /*114*/
2518 };
2519 
2520 EXTERN_C_BEGIN
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2523 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_SeqBAIJ(Mat mat)
2524 {
2525   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2526   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2527   PetscErrorCode ierr;
2528 
2529   PetscFunctionBegin;
2530   if (aij->nonew != 1) {
2531     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2532   }
2533 
2534   /* allocate space for values if not already there */
2535   if (!aij->saved_values) {
2536     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
2537   }
2538 
2539   /* copy values over */
2540   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 EXTERN_C_END
2544 
2545 EXTERN_C_BEGIN
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2548 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_SeqBAIJ(Mat mat)
2549 {
2550   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
2551   PetscErrorCode ierr;
2552   PetscInt       nz = aij->i[mat->rmap->N]*mat->rmap->bs*aij->bs2;
2553 
2554   PetscFunctionBegin;
2555   if (aij->nonew != 1) {
2556     SETERRQ(PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2557   }
2558   if (!aij->saved_values) {
2559     SETERRQ(PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2560   }
2561 
2562   /* copy values over */
2563   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2564   PetscFunctionReturn(0);
2565 }
2566 EXTERN_C_END
2567 
2568 EXTERN_C_BEGIN
2569 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2570 extern PetscErrorCode PETSCMAT_DLLEXPORT MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2571 EXTERN_C_END
2572 
2573 EXTERN_C_BEGIN
2574 #undef __FUNCT__
2575 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2576 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2577 {
2578   Mat_SeqBAIJ    *b;
2579   PetscErrorCode ierr;
2580   PetscInt       i,mbs,nbs,bs2,newbs = PetscAbs(bs);
2581   PetscTruth     flg,skipallocation = PETSC_FALSE;
2582 
2583   PetscFunctionBegin;
2584 
2585   if (nz == MAT_SKIP_ALLOCATION) {
2586     skipallocation = PETSC_TRUE;
2587     nz             = 0;
2588   }
2589 
2590   if (bs < 0) {
2591     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Block options for SEQBAIJ matrix 1","Mat");CHKERRQ(ierr);
2592       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatSeqBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
2593     ierr = PetscOptionsEnd();CHKERRQ(ierr);
2594     bs   = PetscAbs(bs);
2595   }
2596   if (nnz && newbs != bs) {
2597     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting nnz");
2598   }
2599   bs   = newbs;
2600 
2601   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2602   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2603   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2604   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2605 
2606   B->preallocated = PETSC_TRUE;
2607 
2608   mbs  = B->rmap->n/bs;
2609   nbs  = B->cmap->n/bs;
2610   bs2  = bs*bs;
2611 
2612   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) {
2613     SETERRQ3(PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2614   }
2615 
2616   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2617   if (nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2618   if (nnz) {
2619     for (i=0; i<mbs; i++) {
2620       if (nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2621       if (nnz[i] > nbs) SETERRQ3(PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2622     }
2623   }
2624 
2625   b       = (Mat_SeqBAIJ*)B->data;
2626   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2627     ierr = PetscOptionsTruth("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
2628   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2629 
2630   if (!flg) {
2631     switch (bs) {
2632     case 1:
2633       B->ops->mult            = MatMult_SeqBAIJ_1;
2634       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
2635       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_1;
2636       break;
2637     case 2:
2638       B->ops->mult            = MatMult_SeqBAIJ_2;
2639       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
2640       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_2;
2641       break;
2642     case 3:
2643       B->ops->mult            = MatMult_SeqBAIJ_3;
2644       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
2645       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_3;
2646       break;
2647     case 4:
2648       B->ops->mult            = MatMult_SeqBAIJ_4;
2649       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
2650       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_4;
2651       break;
2652     case 5:
2653       B->ops->mult            = MatMult_SeqBAIJ_5;
2654       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
2655       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_5;
2656       break;
2657     case 6:
2658       B->ops->mult            = MatMult_SeqBAIJ_6;
2659       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
2660       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_6;
2661       break;
2662     case 7:
2663       B->ops->mult            = MatMult_SeqBAIJ_7;
2664       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
2665       B->ops->pbrelax         = MatPBRelax_SeqBAIJ_7;
2666       break;
2667     default:
2668       B->ops->mult            = MatMult_SeqBAIJ_N;
2669       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
2670       break;
2671     }
2672   }
2673   B->rmap->bs      = bs;
2674   b->mbs     = mbs;
2675   b->nbs     = nbs;
2676   if (!skipallocation) {
2677     if (!b->imax) {
2678       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
2679     }
2680     /* b->ilen will count nonzeros in each block row so far. */
2681     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
2682     if (!nnz) {
2683       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2684       else if (nz <= 0)        nz = 1;
2685       for (i=0; i<mbs; i++) b->imax[i] = nz;
2686       nz = nz*mbs;
2687     } else {
2688       nz = 0;
2689       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2690     }
2691 
2692     /* allocate the matrix space */
2693     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2694     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
2695     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2696     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2697     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2698     b->singlemalloc = PETSC_TRUE;
2699     b->i[0] = 0;
2700     for (i=1; i<mbs+1; i++) {
2701       b->i[i] = b->i[i-1] + b->imax[i-1];
2702     }
2703     b->free_a     = PETSC_TRUE;
2704     b->free_ij    = PETSC_TRUE;
2705   } else {
2706     b->free_a     = PETSC_FALSE;
2707     b->free_ij    = PETSC_FALSE;
2708   }
2709 
2710   B->rmap->bs          = bs;
2711   b->bs2              = bs2;
2712   b->mbs              = mbs;
2713   b->nz               = 0;
2714   b->maxnz            = nz*bs2;
2715   B->info.nz_unneeded = (PetscReal)b->maxnz;
2716   PetscFunctionReturn(0);
2717 }
2718 EXTERN_C_END
2719 
2720 EXTERN_C_BEGIN
2721 #undef __FUNCT__
2722 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2723 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2724 {
2725   PetscInt       i,m,nz,nz_max=0,*nnz;
2726   PetscScalar    *values=0;
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730 
2731   if (bs < 1) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2732 
2733   ierr = PetscMapSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2734   ierr = PetscMapSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2735   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
2736   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
2737   m = B->rmap->n/bs;
2738 
2739   if (ii[0] != 0) { SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
2740   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
2741   for(i=0; i<m; i++) {
2742     nz = ii[i+1]- ii[i];
2743     if (nz < 0) { SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
2744     nz_max = PetscMax(nz_max, nz);
2745     nnz[i] = nz;
2746   }
2747   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2748   ierr = PetscFree(nnz);CHKERRQ(ierr);
2749 
2750   values = (PetscScalar*)V;
2751   if (!values) {
2752     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
2753     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
2754   }
2755   for (i=0; i<m; i++) {
2756     PetscInt          ncols  = ii[i+1] - ii[i];
2757     const PetscInt    *icols = jj + ii[i];
2758     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2759     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2760   }
2761   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2762   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2763   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2764 
2765   PetscFunctionReturn(0);
2766 }
2767 EXTERN_C_END
2768 
2769 
2770 EXTERN_C_BEGIN
2771 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2772 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
2773 EXTERN_C_END
2774 
2775 /*MC
2776    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2777    block sparse compressed row format.
2778 
2779    Options Database Keys:
2780 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2781 
2782   Level: beginner
2783 
2784 .seealso: MatCreateSeqBAIJ()
2785 M*/
2786 
2787 
2788 EXTERN_C_BEGIN
2789 #undef __FUNCT__
2790 #define __FUNCT__ "MatCreate_SeqBAIJ"
2791 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SeqBAIJ(Mat B)
2792 {
2793   PetscErrorCode ierr;
2794   PetscMPIInt    size;
2795   Mat_SeqBAIJ    *b;
2796 
2797   PetscFunctionBegin;
2798   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
2799   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2800 
2801   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
2802   B->data = (void*)b;
2803   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2804   B->mapping               = 0;
2805   b->row                   = 0;
2806   b->col                   = 0;
2807   b->icol                  = 0;
2808   b->reallocs              = 0;
2809   b->saved_values          = 0;
2810 
2811   b->roworiented           = PETSC_TRUE;
2812   b->nonew                 = 0;
2813   b->diag                  = 0;
2814   b->solve_work            = 0;
2815   b->mult_work             = 0;
2816   B->spptr                 = 0;
2817   B->info.nz_unneeded      = (PetscReal)b->maxnz;
2818   b->keepnonzeropattern    = PETSC_FALSE;
2819   b->xtoy                  = 0;
2820   b->XtoY                  = 0;
2821   b->compressedrow.use     = PETSC_FALSE;
2822   b->compressedrow.nrows   = 0;
2823   b->compressedrow.i       = PETSC_NULL;
2824   b->compressedrow.rindex  = PETSC_NULL;
2825   b->compressedrow.checked = PETSC_FALSE;
2826   B->same_nonzero          = PETSC_FALSE;
2827 
2828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_seqbaij_petsc_C",
2829                                      "MatGetFactorAvailable_seqbaij_petsc",
2830                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
2831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_seqbaij_petsc_C",
2832                                      "MatGetFactor_seqbaij_petsc",
2833                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
2834   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJInvertBlockDiagonal_C",
2835                                      "MatInvertBlockDiagonal_SeqBAIJ",
2836                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
2837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
2838                                      "MatStoreValues_SeqBAIJ",
2839                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
2840   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
2841                                      "MatRetrieveValues_SeqBAIJ",
2842                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
2843   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
2844                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
2845                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
2846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
2847                                      "MatConvert_SeqBAIJ_SeqAIJ",
2848                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
2849   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
2850                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
2851                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
2852   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
2853                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
2854                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
2855   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
2856                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
2857                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
2858   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
2859   PetscFunctionReturn(0);
2860 }
2861 EXTERN_C_END
2862 
2863 #undef __FUNCT__
2864 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
2865 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues)
2866 {
2867   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
2868   PetscErrorCode ierr;
2869   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
2870 
2871   PetscFunctionBegin;
2872   if (a->i[mbs] != nz) SETERRQ(PETSC_ERR_PLIB,"Corrupt matrix");
2873 
2874   ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
2875   for (i=0; i<mbs; i++) {
2876     c->imax[i] = a->imax[i];
2877     c->ilen[i] = a->ilen[i];
2878   }
2879 
2880   /* allocate the matrix space */
2881   ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
2882   c->singlemalloc = PETSC_TRUE;
2883   ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2884   if (mbs > 0) {
2885     ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2886     if (cpvalues == MAT_COPY_VALUES) {
2887       ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2888     } else {
2889       ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
2890     }
2891   }
2892   c->roworiented = a->roworiented;
2893   c->nonew       = a->nonew;
2894   ierr = PetscMapCopy(((PetscObject)A)->comm,A->rmap,C->rmap);CHKERRQ(ierr);
2895   ierr = PetscMapCopy(((PetscObject)A)->comm,A->cmap,C->cmap);CHKERRQ(ierr);
2896   c->bs2         = a->bs2;
2897   c->mbs         = a->mbs;
2898   c->nbs         = a->nbs;
2899 
2900   if (a->diag) {
2901     ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
2902     ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2903     for (i=0; i<mbs; i++) {
2904       c->diag[i] = a->diag[i];
2905     }
2906   } else c->diag        = 0;
2907   c->nz                 = a->nz;
2908   c->maxnz              = a->maxnz;
2909   c->solve_work         = 0;
2910   c->mult_work          = 0;
2911   c->free_a             = PETSC_TRUE;
2912   c->free_ij            = PETSC_TRUE;
2913   C->preallocated       = PETSC_TRUE;
2914   C->assembled          = PETSC_TRUE;
2915 
2916   c->compressedrow.use     = a->compressedrow.use;
2917   c->compressedrow.nrows   = a->compressedrow.nrows;
2918   c->compressedrow.checked = a->compressedrow.checked;
2919   if ( a->compressedrow.checked && a->compressedrow.use){
2920     i = a->compressedrow.nrows;
2921     ierr = PetscMalloc((2*i+1)*sizeof(PetscInt),&c->compressedrow.i);CHKERRQ(ierr);
2922     c->compressedrow.rindex = c->compressedrow.i + i + 1;
2923     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
2924     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
2925   } else {
2926     c->compressedrow.use    = PETSC_FALSE;
2927     c->compressedrow.i      = PETSC_NULL;
2928     c->compressedrow.rindex = PETSC_NULL;
2929   }
2930   C->same_nonzero = A->same_nonzero;
2931   ierr = PetscFListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
2932   PetscFunctionReturn(0);
2933 }
2934 
2935 #undef __FUNCT__
2936 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
2937 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
2938 {
2939     PetscErrorCode ierr;
2940 
2941   PetscFunctionBegin;
2942   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
2943   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
2944   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
2945   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues);
2946   PetscFunctionReturn(0);
2947 }
2948 
2949 #undef __FUNCT__
2950 #define __FUNCT__ "MatLoad_SeqBAIJ"
2951 PetscErrorCode MatLoad_SeqBAIJ(PetscViewer viewer, const MatType type,Mat *A)
2952 {
2953   Mat_SeqBAIJ    *a;
2954   Mat            B;
2955   PetscErrorCode ierr;
2956   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
2957   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
2958   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows;
2959   PetscInt       *masked,nmask,tmp,bs2,ishift;
2960   PetscMPIInt    size;
2961   int            fd;
2962   PetscScalar    *aa;
2963   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2964 
2965   PetscFunctionBegin;
2966   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
2967     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2968   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2969   bs2  = bs*bs;
2970 
2971   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2972   if (size > 1) SETERRQ(PETSC_ERR_ARG_WRONG,"view must have one processor");
2973   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2974   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
2975   if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
2976   M = header[1]; N = header[2]; nz = header[3];
2977 
2978   if (header[3] < 0) {
2979     SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
2980   }
2981 
2982   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2983 
2984   /*
2985      This code adds extra rows to make sure the number of rows is
2986     divisible by the blocksize
2987   */
2988   mbs        = M/bs;
2989   extra_rows = bs - M + bs*(mbs);
2990   if (extra_rows == bs) extra_rows = 0;
2991   else                  mbs++;
2992   if (extra_rows) {
2993     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2994   }
2995 
2996   /* read in row lengths */
2997   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2998   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2999   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3000 
3001   /* read in column indices */
3002   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3003   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3004   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3005 
3006   /* loop over row lengths determining block row lengths */
3007   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3008   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3009   ierr     = PetscMalloc(2*mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
3010   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3011   masked   = mask + mbs;
3012   rowcount = 0; nzcount = 0;
3013   for (i=0; i<mbs; i++) {
3014     nmask = 0;
3015     for (j=0; j<bs; j++) {
3016       kmax = rowlengths[rowcount];
3017       for (k=0; k<kmax; k++) {
3018         tmp = jj[nzcount++]/bs;
3019         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3020       }
3021       rowcount++;
3022     }
3023     browlengths[i] += nmask;
3024     /* zero out the mask elements we set */
3025     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3026   }
3027 
3028   /* create our matrix */
3029   ierr = MatCreate(comm,&B);
3030   ierr = MatSetSizes(B,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);
3031   ierr = MatSetType(B,type);CHKERRQ(ierr);
3032   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,0,browlengths);CHKERRQ(ierr);
3033   a = (Mat_SeqBAIJ*)B->data;
3034 
3035   /* set matrix "i" values */
3036   a->i[0] = 0;
3037   for (i=1; i<= mbs; i++) {
3038     a->i[i]      = a->i[i-1] + browlengths[i-1];
3039     a->ilen[i-1] = browlengths[i-1];
3040   }
3041   a->nz         = 0;
3042   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3043 
3044   /* read in nonzero values */
3045   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3046   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3047   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3048 
3049   /* set "a" and "j" values into matrix */
3050   nzcount = 0; jcount = 0;
3051   for (i=0; i<mbs; i++) {
3052     nzcountb = nzcount;
3053     nmask    = 0;
3054     for (j=0; j<bs; j++) {
3055       kmax = rowlengths[i*bs+j];
3056       for (k=0; k<kmax; k++) {
3057         tmp = jj[nzcount++]/bs;
3058 	if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3059       }
3060     }
3061     /* sort the masked values */
3062     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3063 
3064     /* set "j" values into matrix */
3065     maskcount = 1;
3066     for (j=0; j<nmask; j++) {
3067       a->j[jcount++]  = masked[j];
3068       mask[masked[j]] = maskcount++;
3069     }
3070     /* set "a" values into matrix */
3071     ishift = bs2*a->i[i];
3072     for (j=0; j<bs; j++) {
3073       kmax = rowlengths[i*bs+j];
3074       for (k=0; k<kmax; k++) {
3075         tmp       = jj[nzcountb]/bs ;
3076         block     = mask[tmp] - 1;
3077         point     = jj[nzcountb] - bs*tmp;
3078         idx       = ishift + bs2*block + j + bs*point;
3079         a->a[idx] = (MatScalar)aa[nzcountb++];
3080       }
3081     }
3082     /* zero out the mask elements we set */
3083     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3084   }
3085   if (jcount != a->nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3086 
3087   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3088   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3089   ierr = PetscFree(aa);CHKERRQ(ierr);
3090   ierr = PetscFree(jj);CHKERRQ(ierr);
3091   ierr = PetscFree(mask);CHKERRQ(ierr);
3092 
3093   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3094   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3095   ierr = MatView_Private(B);CHKERRQ(ierr);
3096 
3097   *A = B;
3098   PetscFunctionReturn(0);
3099 }
3100 
3101 #undef __FUNCT__
3102 #define __FUNCT__ "MatCreateSeqBAIJ"
3103 /*@C
3104    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3105    compressed row) format.  For good matrix assembly performance the
3106    user should preallocate the matrix storage by setting the parameter nz
3107    (or the array nnz).  By setting these parameters accurately, performance
3108    during matrix assembly can be increased by more than a factor of 50.
3109 
3110    Collective on MPI_Comm
3111 
3112    Input Parameters:
3113 +  comm - MPI communicator, set to PETSC_COMM_SELF
3114 .  bs - size of block
3115 .  m - number of rows
3116 .  n - number of columns
3117 .  nz - number of nonzero blocks  per block row (same for all rows)
3118 -  nnz - array containing the number of nonzero blocks in the various block rows
3119          (possibly different for each block row) or PETSC_NULL
3120 
3121    Output Parameter:
3122 .  A - the matrix
3123 
3124    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3125    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3126    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3127 
3128    Options Database Keys:
3129 .   -mat_no_unroll - uses code that does not unroll the loops in the
3130                      block calculations (much slower)
3131 .    -mat_block_size - size of the blocks to use
3132 
3133    Level: intermediate
3134 
3135    Notes:
3136    The number of rows and columns must be divisible by blocksize.
3137 
3138    If the nnz parameter is given then the nz parameter is ignored
3139 
3140    A nonzero block is any block that as 1 or more nonzeros in it
3141 
3142    The block AIJ format is fully compatible with standard Fortran 77
3143    storage.  That is, the stored row and column indices can begin at
3144    either one (as in Fortran) or zero.  See the users' manual for details.
3145 
3146    Specify the preallocated storage with either nz or nnz (not both).
3147    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3148    allocation.  For additional details, see the users manual chapter on
3149    matrices.
3150 
3151 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ()
3152 @*/
3153 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3154 {
3155   PetscErrorCode ierr;
3156 
3157   PetscFunctionBegin;
3158   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3159   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3160   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3161   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3162   PetscFunctionReturn(0);
3163 }
3164 
3165 #undef __FUNCT__
3166 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3167 /*@C
3168    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3169    per row in the matrix. For good matrix assembly performance the
3170    user should preallocate the matrix storage by setting the parameter nz
3171    (or the array nnz).  By setting these parameters accurately, performance
3172    during matrix assembly can be increased by more than a factor of 50.
3173 
3174    Collective on MPI_Comm
3175 
3176    Input Parameters:
3177 +  A - the matrix
3178 .  bs - size of block
3179 .  nz - number of block nonzeros per block row (same for all rows)
3180 -  nnz - array containing the number of block nonzeros in the various block rows
3181          (possibly different for each block row) or PETSC_NULL
3182 
3183    Options Database Keys:
3184 .   -mat_no_unroll - uses code that does not unroll the loops in the
3185                      block calculations (much slower)
3186 .    -mat_block_size - size of the blocks to use
3187 
3188    Level: intermediate
3189 
3190    Notes:
3191    If the nnz parameter is given then the nz parameter is ignored
3192 
3193    You can call MatGetInfo() to get information on how effective the preallocation was;
3194    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3195    You can also run with the option -info and look for messages with the string
3196    malloc in them to see if additional memory allocation was needed.
3197 
3198    The block AIJ format is fully compatible with standard Fortran 77
3199    storage.  That is, the stored row and column indices can begin at
3200    either one (as in Fortran) or zero.  See the users' manual for details.
3201 
3202    Specify the preallocated storage with either nz or nnz (not both).
3203    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3204    allocation.  For additional details, see the users manual chapter on
3205    matrices.
3206 
3207 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateMPIBAIJ(), MatGetInfo()
3208 @*/
3209 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3210 {
3211   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[]);
3212 
3213   PetscFunctionBegin;
3214   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
3215   if (f) {
3216     ierr = (*f)(B,bs,nz,nnz);CHKERRQ(ierr);
3217   }
3218   PetscFunctionReturn(0);
3219 }
3220 
3221 #undef __FUNCT__
3222 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3223 /*@C
3224    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3225    (the default sequential PETSc format).
3226 
3227    Collective on MPI_Comm
3228 
3229    Input Parameters:
3230 +  A - the matrix
3231 .  i - the indices into j for the start of each local row (starts with zero)
3232 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3233 -  v - optional values in the matrix
3234 
3235    Level: developer
3236 
3237 .keywords: matrix, aij, compressed row, sparse
3238 
3239 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3240 @*/
3241 PetscErrorCode PETSCMAT_DLLEXPORT MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3242 {
3243   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
3244 
3245   PetscFunctionBegin;
3246   ierr = PetscObjectQueryFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
3247   if (f) {
3248     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
3249   }
3250   PetscFunctionReturn(0);
3251 }
3252 
3253 
3254 #undef __FUNCT__
3255 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3256 /*@
3257      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements
3258               (upper triangular entries in CSR format) provided by the user.
3259 
3260      Collective on MPI_Comm
3261 
3262    Input Parameters:
3263 +  comm - must be an MPI communicator of size 1
3264 .  bs - size of block
3265 .  m - number of rows
3266 .  n - number of columns
3267 .  i - row indices
3268 .  j - column indices
3269 -  a - matrix values
3270 
3271    Output Parameter:
3272 .  mat - the matrix
3273 
3274    Level: intermediate
3275 
3276    Notes:
3277        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3278     once the matrix is destroyed
3279 
3280        You cannot set new nonzero locations into this matrix, that will generate an error.
3281 
3282        The i and j indices are 0 based
3283 
3284 .seealso: MatCreate(), MatCreateMPIBAIJ(), MatCreateSeqBAIJ()
3285 
3286 @*/
3287 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3288 {
3289   PetscErrorCode ierr;
3290   PetscInt       ii;
3291   Mat_SeqBAIJ    *baij;
3292 
3293   PetscFunctionBegin;
3294   if (bs != 1) SETERRQ1(PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3295   if (i[0]) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3296 
3297   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3298   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3299   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3300   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3301   baij = (Mat_SeqBAIJ*)(*mat)->data;
3302   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3303 
3304   baij->i = i;
3305   baij->j = j;
3306   baij->a = a;
3307   baij->singlemalloc = PETSC_FALSE;
3308   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3309   baij->free_a       = PETSC_FALSE;
3310   baij->free_ij       = PETSC_FALSE;
3311 
3312   for (ii=0; ii<m; ii++) {
3313     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3314 #if defined(PETSC_USE_DEBUG)
3315     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3316 #endif
3317   }
3318 #if defined(PETSC_USE_DEBUG)
3319   for (ii=0; ii<baij->i[m]; ii++) {
3320     if (j[ii] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3321     if (j[ii] > n - 1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3322   }
3323 #endif
3324 
3325   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3327   PetscFunctionReturn(0);
3328 }
3329