xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 2205254efee3a00a594e5e2a3a70f74dcb40bc03)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52         ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53         diag    += 4;
54         mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66         ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67         diag    += 9;
68         mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76         ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77         diag  += 16;
78         mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86         ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87         diag  += 25;
88         mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97         diag  += 36;
98         mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107         diag  += 49;
108         mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118         diag  += bs2;
119         mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
211   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
212   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
213   PetscFunctionReturn(0);
214 }
215 
216 #undef __FUNCT__
217 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
218 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
219 {
220   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
221   PetscScalar        *x,x1,x2,s1,s2;
222   const PetscScalar  *b;
223   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
224   PetscErrorCode     ierr;
225   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
226   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
227 
228   PetscFunctionBegin;
229   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
230   its = its*lits;
231   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
232   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
233   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
234   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
235   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
236 
237   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
238 
239   diag  = a->diag;
240   idiag = a->idiag;
241   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
242   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
243 
244   if (flag & SOR_ZERO_INITIAL_GUESS) {
245     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
246       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
247       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
248       i2     = 2;
249       idiag += 4;
250       for (i=1; i<m; i++) {
251         v     = aa + 4*ai[i];
252         vi    = aj + ai[i];
253         nz    = diag[i] - ai[i];
254         s1    = b[i2]; s2 = b[i2+1];
255         for (j=0; j<nz; j++) {
256           idx  = 2*vi[j];
257           it   = 4*j;
258           x1   = x[idx]; x2 = x[1+idx];
259           s1  -= v[it]*x1 + v[it+2]*x2;
260           s2  -= v[it+1]*x1 + v[it+3]*x2;
261         }
262         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
263         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
264         idiag   += 4;
265         i2      += 2;
266       }
267       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
268       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
269     }
270     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
271         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
272       i2    = 0;
273       mdiag = a->idiag+4*a->mbs;
274       for (i=0; i<m; i++) {
275         x1      = x[i2]; x2 = x[i2+1];
276         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
277         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
278         mdiag  += 4;
279         i2     += 2;
280       }
281       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
282     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
283       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
284     }
285     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
286       idiag   = a->idiag+4*a->mbs - 4;
287       i2      = 2*m - 2;
288       x1      = x[i2]; x2 = x[i2+1];
289       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
290       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
291       idiag -= 4;
292       i2    -= 2;
293       for (i=m-2; i>=0; i--) {
294         v     = aa + 4*(diag[i]+1);
295         vi    = aj + diag[i] + 1;
296         nz    = ai[i+1] - diag[i] - 1;
297         s1    = x[i2]; s2 = x[i2+1];
298         for (j=0; j<nz; j++) {
299           idx  = 2*vi[j];
300           it   = 4*j;
301           x1   = x[idx]; x2 = x[1+idx];
302           s1  -= v[it]*x1 + v[it+2]*x2;
303           s2  -= v[it+1]*x1 + v[it+3]*x2;
304         }
305         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
306         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
307         idiag   -= 4;
308         i2      -= 2;
309       }
310       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
311     }
312   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
313   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
314   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
320 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
321 {
322   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
323   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
324   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
325   const PetscScalar  *b;
326   PetscErrorCode     ierr;
327   PetscInt           m = a->mbs,i,i2,nz,idx;
328   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
329 
330   PetscFunctionBegin;
331   its = its*lits;
332   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
333   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
334   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
335   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
336   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
337   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
338 
339   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
340 
341   diag  = a->diag;
342   idiag = a->idiag;
343   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
344   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
345 
346   if (flag & SOR_ZERO_INITIAL_GUESS) {
347     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
348       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
349       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
350       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
351       i2     = 3;
352       idiag += 9;
353       for (i=1; i<m; i++) {
354         v     = aa + 9*ai[i];
355         vi    = aj + ai[i];
356         nz    = diag[i] - ai[i];
357         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
358         while (nz--) {
359           idx  = 3*(*vi++);
360           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
361           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
362           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
363           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
364           v   += 9;
365         }
366         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
367         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
368         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
369         idiag   += 9;
370         i2      += 3;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
374     }
375     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
376         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
377       i2    = 0;
378       mdiag = a->idiag+9*a->mbs;
379       for (i=0; i<m; i++) {
380         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
381         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
382         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
383         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
384         mdiag  += 9;
385         i2     += 3;
386       }
387       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
388     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
389       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
390     }
391     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
392       idiag   = a->idiag+9*a->mbs - 9;
393       i2      = 3*m - 3;
394       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
395       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
396       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
397       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
398       idiag -= 9;
399       i2    -= 3;
400       for (i=m-2; i>=0; i--) {
401         v     = aa + 9*(diag[i]+1);
402         vi    = aj + diag[i] + 1;
403         nz    = ai[i+1] - diag[i] - 1;
404         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
405         while (nz--) {
406           idx  = 3*(*vi++);
407           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
408           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
409           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
410           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
411           v   += 9;
412         }
413         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
414         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
415         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
416         idiag   -= 9;
417         i2      -= 3;
418       }
419       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
420     }
421   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
422   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
423   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
429 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
430 {
431   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
432   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
433   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
434   const PetscScalar  *b;
435   PetscErrorCode     ierr;
436   PetscInt           m = a->mbs,i,i2,nz,idx;
437   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
438 
439   PetscFunctionBegin;
440   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
441   its = its*lits;
442   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
443   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
444   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
445   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
446   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
447 
448   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
449 
450   diag  = a->diag;
451   idiag = a->idiag;
452   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
453   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
454 
455   if (flag & SOR_ZERO_INITIAL_GUESS) {
456     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
457       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
458       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
459       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
460       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
461       i2     = 4;
462       idiag += 16;
463       for (i=1; i<m; i++) {
464         v     = aa + 16*ai[i];
465         vi    = aj + ai[i];
466         nz    = diag[i] - ai[i];
467         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
468         while (nz--) {
469           idx  = 4*(*vi++);
470           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
471           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
472           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
473           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
474           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
475           v   += 16;
476         }
477         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
478         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
479         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
480         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
481         idiag   += 16;
482         i2      += 4;
483       }
484       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
485       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
486     }
487     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
488         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
489       i2    = 0;
490       mdiag = a->idiag+16*a->mbs;
491       for (i=0; i<m; i++) {
492         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
493         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
494         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
495         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
496         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
497         mdiag  += 16;
498         i2     += 4;
499       }
500       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
501     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
502       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
503     }
504     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
505       idiag   = a->idiag+16*a->mbs - 16;
506       i2      = 4*m - 4;
507       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
508       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
509       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
510       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
511       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
512       idiag -= 16;
513       i2    -= 4;
514       for (i=m-2; i>=0; i--) {
515         v     = aa + 16*(diag[i]+1);
516         vi    = aj + diag[i] + 1;
517         nz    = ai[i+1] - diag[i] - 1;
518         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
519         while (nz--) {
520           idx  = 4*(*vi++);
521           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
522           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
523           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
524           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
525           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
526           v   += 16;
527         }
528         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
529         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
530         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
531         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
532         idiag   -= 16;
533         i2      -= 4;
534       }
535       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
536     }
537   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
538   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
539   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
540   PetscFunctionReturn(0);
541 }
542 
543 #undef __FUNCT__
544 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
545 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
546 {
547   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
548   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
549   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
550   const PetscScalar  *b;
551   PetscErrorCode     ierr;
552   PetscInt           m = a->mbs,i,i2,nz,idx;
553   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
554 
555   PetscFunctionBegin;
556   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
557   its = its*lits;
558   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
559   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
560   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
561   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
562   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
563 
564   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
565 
566   diag  = a->diag;
567   idiag = a->idiag;
568   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
569   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
570 
571   if (flag & SOR_ZERO_INITIAL_GUESS) {
572     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
573       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
574       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
575       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
576       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
577       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
578       i2     = 5;
579       idiag += 25;
580       for (i=1; i<m; i++) {
581         v     = aa + 25*ai[i];
582         vi    = aj + ai[i];
583         nz    = diag[i] - ai[i];
584         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
585         while (nz--) {
586           idx  = 5*(*vi++);
587           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
588           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
589           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
590           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
591           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
592           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
593           v   += 25;
594         }
595         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
596         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
597         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
598         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
599         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
600         idiag   += 25;
601         i2      += 5;
602       }
603       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
604       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
605     }
606     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
607         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
608       i2    = 0;
609       mdiag = a->idiag+25*a->mbs;
610       for (i=0; i<m; i++) {
611         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
612         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
613         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
614         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
615         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
616         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
617         mdiag  += 25;
618         i2     += 5;
619       }
620       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
621     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
622       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
623     }
624     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
625       idiag   = a->idiag+25*a->mbs - 25;
626       i2      = 5*m - 5;
627       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
628       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
629       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
630       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
631       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
632       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
633       idiag -= 25;
634       i2    -= 5;
635       for (i=m-2; i>=0; i--) {
636         v     = aa + 25*(diag[i]+1);
637         vi    = aj + diag[i] + 1;
638         nz    = ai[i+1] - diag[i] - 1;
639         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
640         while (nz--) {
641           idx  = 5*(*vi++);
642           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
643           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
644           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
645           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
646           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
647           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
648           v   += 25;
649         }
650         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
651         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
652         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
653         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
654         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
655         idiag   -= 25;
656         i2      -= 5;
657       }
658       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
659     }
660   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
661   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
668 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
669 {
670   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
671   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
672   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
673   const PetscScalar  *b;
674   PetscErrorCode     ierr;
675   PetscInt           m = a->mbs,i,i2,nz,idx;
676   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
677 
678   PetscFunctionBegin;
679   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
680   its = its*lits;
681   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
682   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
683   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
684   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
685   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
686 
687   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
688 
689   diag  = a->diag;
690   idiag = a->idiag;
691   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
692   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
693 
694   if (flag & SOR_ZERO_INITIAL_GUESS) {
695     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
696       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
697       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
698       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
699       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
700       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
701       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
702       i2     = 6;
703       idiag += 36;
704       for (i=1; i<m; i++) {
705         v     = aa + 36*ai[i];
706         vi    = aj + ai[i];
707         nz    = diag[i] - ai[i];
708         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
709         while (nz--) {
710           idx  = 6*(*vi++);
711           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
712           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
713           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
714           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
715           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
716           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
717           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
718           v   += 36;
719         }
720         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
721         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
722         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
723         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
724         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
725         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
726         idiag   += 36;
727         i2      += 6;
728       }
729       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
730       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
731     }
732     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
733         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
734       i2    = 0;
735       mdiag = a->idiag+36*a->mbs;
736       for (i=0; i<m; i++) {
737         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
738         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
739         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
740         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
741         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
742         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
743         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
744         mdiag  += 36;
745         i2     += 6;
746       }
747       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
748     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
749       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
750     }
751     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
752       idiag   = a->idiag+36*a->mbs - 36;
753       i2      = 6*m - 6;
754       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
755       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
756       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
757       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
758       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
759       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
760       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
761       idiag -= 36;
762       i2    -= 6;
763       for (i=m-2; i>=0; i--) {
764         v     = aa + 36*(diag[i]+1);
765         vi    = aj + diag[i] + 1;
766         nz    = ai[i+1] - diag[i] - 1;
767         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
768         while (nz--) {
769           idx  = 6*(*vi++);
770           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
771           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
772           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
773           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
774           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
775           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
776           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
777           v   += 36;
778         }
779         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
780         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
781         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
782         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
783         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
784         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
785         idiag   -= 36;
786         i2      -= 6;
787       }
788       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
789     }
790   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
791   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
792   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
798 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
799 {
800   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
801   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
802   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
803   const PetscScalar  *b;
804   PetscErrorCode     ierr;
805   PetscInt           m = a->mbs,i,i2,nz,idx;
806   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
807 
808   PetscFunctionBegin;
809   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
810   its = its*lits;
811   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
812   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
813   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
814   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
815   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
816 
817   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
818 
819   diag  = a->diag;
820   idiag = a->idiag;
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
823 
824   if (flag & SOR_ZERO_INITIAL_GUESS) {
825     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
826       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
827       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
828       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
829       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
830       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
831       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
832       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
833       i2     = 7;
834       idiag += 49;
835       for (i=1; i<m; i++) {
836         v     = aa + 49*ai[i];
837         vi    = aj + ai[i];
838         nz    = diag[i] - ai[i];
839         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
840         while (nz--) {
841           idx  = 7*(*vi++);
842           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
843           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
844           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
845           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
846           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
847           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
848           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
849           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
850           v   += 49;
851         }
852         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
853         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
854         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
855         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
856         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
857         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
858         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
859         idiag   += 49;
860         i2      += 7;
861       }
862       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
863       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
864     }
865     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
866         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
867       i2    = 0;
868       mdiag = a->idiag+49*a->mbs;
869       for (i=0; i<m; i++) {
870         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
871         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
872         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
873         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
874         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
875         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
876         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
877         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
878         mdiag  += 49;
879         i2     += 7;
880       }
881       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
882     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
883       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
884     }
885     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
886       idiag   = a->idiag+49*a->mbs - 49;
887       i2      = 7*m - 7;
888       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
889       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
890       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
891       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
892       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
893       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
894       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
895       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
896       idiag -= 49;
897       i2    -= 7;
898       for (i=m-2; i>=0; i--) {
899         v     = aa + 49*(diag[i]+1);
900         vi    = aj + diag[i] + 1;
901         nz    = ai[i+1] - diag[i] - 1;
902         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
903         while (nz--) {
904           idx  = 7*(*vi++);
905           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
906           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
907           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
908           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
909           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
910           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
911           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
912           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
913           v   += 49;
914         }
915         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
916         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
917         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
918         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
919         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
920         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
921         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
922         idiag   -= 49;
923         i2      -= 7;
924       }
925       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
926     }
927   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
928   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
929   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
935 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
936 {
937   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
938   PetscScalar        *x,*work,*w,*workt;
939   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
940   const PetscScalar  *b;
941   PetscErrorCode     ierr;
942   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
943   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
944 
945   PetscFunctionBegin;
946   its = its*lits;
947   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
948   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
949   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
950   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
951   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
952   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
953 
954   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
955 
956   diag  = a->diag;
957   idiag = a->idiag;
958   if (!a->mult_work) {
959     k    = PetscMax(A->rmap->n,A->cmap->n);
960     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
961   }
962   work = a->mult_work;
963   if (!a->sor_work) {
964     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
965   }
966   w = a->sor_work;
967 
968   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
969   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
970 
971   if (flag & SOR_ZERO_INITIAL_GUESS) {
972     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
973       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
974       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
975       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
976       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
977       i2     = bs;
978       idiag += bs2;
979       for (i=1; i<m; i++) {
980         v     = aa + bs2*ai[i];
981         vi    = aj + ai[i];
982         nz    = diag[i] - ai[i];
983 
984         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
985         /* copy all rows of x that are needed into contiguous space */
986         workt = work;
987         for (j=0; j<nz; j++) {
988           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
989           workt += bs;
990         }
991         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
992        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
993         while (nz--) {
994           idx  = N*(*vi++);
995           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
996           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
997           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
998           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
999           v   += N2;
1000           } */
1001 
1002         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1003         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1004         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1005         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1006 
1007         idiag   += bs2;
1008         i2      += bs;
1009       }
1010       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1011       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1012     }
1013     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1014         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1015       i2    = 0;
1016       mdiag = a->idiag+bs2*a->mbs;
1017       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1018       for (i=0; i<m; i++) {
1019         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1020         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1021         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1022         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1023         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1024 
1025         mdiag  += bs2;
1026         i2     += bs;
1027       }
1028       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1029     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1030       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1031     }
1032     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1033       idiag   = a->idiag+bs2*a->mbs - bs2;
1034       i2      = bs*m - bs;
1035       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1036       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1037       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1038       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1039       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1040       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1041       idiag -= bs2;
1042       i2    -= bs;
1043       for (i=m-2; i>=0; i--) {
1044         v     = aa + bs2*(diag[i]+1);
1045         vi    = aj + diag[i] + 1;
1046         nz    = ai[i+1] - diag[i] - 1;
1047 
1048         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049         /* copy all rows of x that are needed into contiguous space */
1050         workt = work;
1051         for (j=0; j<nz; j++) {
1052           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1053           workt += bs;
1054         }
1055         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1056         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1057         while (nz--) {
1058           idx  = N*(*vi++);
1059           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1060           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1061           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1062           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1063           v   += N2;
1064           } */
1065 
1066         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1067         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1068         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1069         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1070         idiag   -= bs2;
1071         i2      -= bs;
1072       }
1073       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1074     }
1075   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1076   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1077   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 /*
1082     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1083 */
1084 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1085 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1086 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1087 #define matsetvaluesblocked4_ matsetvaluesblocked4
1088 #endif
1089 
1090 EXTERN_C_BEGIN
1091 #undef __FUNCT__
1092 #define __FUNCT__ "matsetvaluesblocked4_"
1093 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1094 {
1095   Mat               A = *AA;
1096   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1097   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1098   PetscInt          *ai=a->i,*ailen=a->ilen;
1099   PetscInt          *aj=a->j,stepval,lastcol = -1;
1100   const PetscScalar *value = v;
1101   MatScalar         *ap,*aa = a->a,*bap;
1102 
1103   PetscFunctionBegin;
1104   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1105   stepval = (n-1)*4;
1106   for (k=0; k<m; k++) { /* loop over added rows */
1107     row  = im[k];
1108     rp   = aj + ai[row];
1109     ap   = aa + 16*ai[row];
1110     nrow = ailen[row];
1111     low  = 0;
1112     high = nrow;
1113     for (l=0; l<n; l++) { /* loop over added columns */
1114       col = in[l];
1115       if (col <= lastcol)  low = 0;
1116       else                high = nrow;
1117       lastcol = col;
1118       value = v + k*(stepval+4 + l)*4;
1119       while (high-low > 7) {
1120         t = (low+high)/2;
1121         if (rp[t] > col) high = t;
1122         else             low  = t;
1123       }
1124       for (i=low; i<high; i++) {
1125         if (rp[i] > col) break;
1126         if (rp[i] == col) {
1127           bap  = ap +  16*i;
1128           for (ii=0; ii<4; ii++,value+=stepval) {
1129             for (jj=ii; jj<16; jj+=4) {
1130               bap[jj] += *value++;
1131             }
1132           }
1133           goto noinsert2;
1134         }
1135       }
1136       N = nrow++ - 1;
1137       high++; /* added new column index thus must search to one higher than before */
1138       /* shift up all the later entries in this row */
1139       for (ii=N; ii>=i; ii--) {
1140         rp[ii+1] = rp[ii];
1141         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1142       }
1143       if (N >= i) {
1144         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1145       }
1146       rp[i] = col;
1147       bap   = ap +  16*i;
1148       for (ii=0; ii<4; ii++,value+=stepval) {
1149         for (jj=ii; jj<16; jj+=4) {
1150           bap[jj] = *value++;
1151         }
1152       }
1153       noinsert2:;
1154       low = i;
1155     }
1156     ailen[row] = nrow;
1157   }
1158   PetscFunctionReturnVoid();
1159 }
1160 EXTERN_C_END
1161 
1162 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1163 #define matsetvalues4_ MATSETVALUES4
1164 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1165 #define matsetvalues4_ matsetvalues4
1166 #endif
1167 
1168 EXTERN_C_BEGIN
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatSetValues4_"
1171 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1172 {
1173   Mat         A = *AA;
1174   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1175   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1176   PetscInt    *ai=a->i,*ailen=a->ilen;
1177   PetscInt    *aj=a->j,brow,bcol;
1178   PetscInt    ridx,cidx,lastcol = -1;
1179   MatScalar   *ap,value,*aa=a->a,*bap;
1180 
1181   PetscFunctionBegin;
1182   for (k=0; k<m; k++) { /* loop over added rows */
1183     row  = im[k]; brow = row/4;
1184     rp   = aj + ai[brow];
1185     ap   = aa + 16*ai[brow];
1186     nrow = ailen[brow];
1187     low  = 0;
1188     high = nrow;
1189     for (l=0; l<n; l++) { /* loop over added columns */
1190       col = in[l]; bcol = col/4;
1191       ridx = row % 4; cidx = col % 4;
1192       value = v[l + k*n];
1193       if (col <= lastcol)  low = 0;
1194       else                high = nrow;
1195       lastcol = col;
1196       while (high-low > 7) {
1197         t = (low+high)/2;
1198         if (rp[t] > bcol) high = t;
1199         else              low  = t;
1200       }
1201       for (i=low; i<high; i++) {
1202         if (rp[i] > bcol) break;
1203         if (rp[i] == bcol) {
1204           bap  = ap +  16*i + 4*cidx + ridx;
1205           *bap += value;
1206           goto noinsert1;
1207         }
1208       }
1209       N = nrow++ - 1;
1210       high++; /* added new column thus must search to one higher than before */
1211       /* shift up all the later entries in this row */
1212       for (ii=N; ii>=i; ii--) {
1213         rp[ii+1] = rp[ii];
1214         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1215       }
1216       if (N>=i) {
1217         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1218       }
1219       rp[i]                    = bcol;
1220       ap[16*i + 4*cidx + ridx] = value;
1221       noinsert1:;
1222       low = i;
1223     }
1224     ailen[brow] = nrow;
1225   }
1226   PetscFunctionReturnVoid();
1227 }
1228 EXTERN_C_END
1229 
1230 /*
1231      Checks for missing diagonals
1232 */
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1235 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1236 {
1237   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1238   PetscErrorCode ierr;
1239   PetscInt       *diag,*jj = a->j,i;
1240 
1241   PetscFunctionBegin;
1242   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1243   *missing = PETSC_FALSE;
1244   if (A->rmap->n > 0 && !jj) {
1245     *missing  = PETSC_TRUE;
1246     if (d) *d = 0;
1247     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1248   } else {
1249     diag     = a->diag;
1250     for (i=0; i<a->mbs; i++) {
1251       if (jj[diag[i]] != i) {
1252         *missing  = PETSC_TRUE;
1253         if (d) *d = i;
1254         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1255         break;
1256       }
1257     }
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1264 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1265 {
1266   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1267   PetscErrorCode ierr;
1268   PetscInt       i,j,m = a->mbs;
1269 
1270   PetscFunctionBegin;
1271   if (!a->diag) {
1272     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1273     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1274     a->free_diag = PETSC_TRUE;
1275   }
1276   for (i=0; i<m; i++) {
1277     a->diag[i] = a->i[i+1];
1278     for (j=a->i[i]; j<a->i[i+1]; j++) {
1279       if (a->j[j] == i) {
1280         a->diag[i] = j;
1281         break;
1282       }
1283     }
1284   }
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 
1289 #undef __FUNCT__
1290 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1291 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1292 {
1293   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1294   PetscErrorCode ierr;
1295   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1296   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1297 
1298   PetscFunctionBegin;
1299   *nn = n;
1300   if (!ia) PetscFunctionReturn(0);
1301   if (symmetric) {
1302     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1303     nz   = tia[n];
1304   } else {
1305     tia = a->i; tja = a->j;
1306   }
1307 
1308   if (!blockcompressed && bs > 1) {
1309     (*nn) *= bs;
1310     /* malloc & create the natural set of indices */
1311     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1312     if (n) {
1313       (*ia)[0] = 0;
1314       for (j=1; j<bs; j++) {
1315         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1316       }
1317     }
1318 
1319     for (i=1; i<n; i++) {
1320       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1321       for (j=1; j<bs; j++) {
1322         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1323       }
1324     }
1325     if (n) {
1326       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1327     }
1328 
1329     if (inja) {
1330       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1331       cnt = 0;
1332       for (i=0; i<n; i++) {
1333         for (j=0; j<bs; j++) {
1334           for (k=tia[i]; k<tia[i+1]; k++) {
1335             for (l=0; l<bs; l++) {
1336               (*ja)[cnt++] = bs*tja[k] + l;
1337             }
1338           }
1339         }
1340       }
1341     }
1342 
1343     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1344       ierr = PetscFree(tia);CHKERRQ(ierr);
1345       ierr = PetscFree(tja);CHKERRQ(ierr);
1346     }
1347   } else if (oshift == 1) {
1348     if (symmetric) {
1349       nz = tia[A->rmap->n/bs];
1350       /*  add 1 to i and j indices */
1351       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1352       *ia = tia;
1353       if (ja) {
1354         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1355         *ja = tja;
1356       }
1357     } else {
1358       nz = a->i[A->rmap->n/bs];
1359       /* malloc space and  add 1 to i and j indices */
1360       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1361       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1362       if (ja) {
1363         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1364         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1365       }
1366     }
1367   } else {
1368     *ia = tia;
1369     if (ja) *ja = tja;
1370   }
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1376 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1377 {
1378   PetscErrorCode ierr;
1379 
1380   PetscFunctionBegin;
1381   if (!ia) PetscFunctionReturn(0);
1382   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1383     ierr = PetscFree(*ia);CHKERRQ(ierr);
1384     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1391 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1392 {
1393   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397 #if defined(PETSC_USE_LOG)
1398   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1399 #endif
1400   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1401   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1402   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1403   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1404   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1405   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1406   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1407   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1408   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1409   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1410   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1411   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1412   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1413 
1414   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1415   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1416   ierr = PetscFree(A->data);CHKERRQ(ierr);
1417 
1418   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1419   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1420   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1421   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1422   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1423   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1424   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1425   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1426   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1427   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1428   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1434 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1435 {
1436   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1437   PetscErrorCode ierr;
1438 
1439   PetscFunctionBegin;
1440   switch (op) {
1441   case MAT_ROW_ORIENTED:
1442     a->roworiented    = flg;
1443     break;
1444   case MAT_KEEP_NONZERO_PATTERN:
1445     a->keepnonzeropattern = flg;
1446     break;
1447   case MAT_NEW_NONZERO_LOCATIONS:
1448     a->nonew          = (flg ? 0 : 1);
1449     break;
1450   case MAT_NEW_NONZERO_LOCATION_ERR:
1451     a->nonew          = (flg ? -1 : 0);
1452     break;
1453   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1454     a->nonew          = (flg ? -2 : 0);
1455     break;
1456   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1457     a->nounused       = (flg ? -1 : 0);
1458     break;
1459   case MAT_CHECK_COMPRESSED_ROW:
1460     a->compressedrow.check = flg;
1461     break;
1462   case MAT_NEW_DIAGONALS:
1463   case MAT_IGNORE_OFF_PROC_ENTRIES:
1464   case MAT_USE_HASH_TABLE:
1465     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1466     break;
1467   case MAT_SPD:
1468   case MAT_SYMMETRIC:
1469   case MAT_STRUCTURALLY_SYMMETRIC:
1470   case MAT_HERMITIAN:
1471   case MAT_SYMMETRY_ETERNAL:
1472     /* These options are handled directly by MatSetOption() */
1473     break;
1474   default:
1475     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 #undef __FUNCT__
1481 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1482 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1483 {
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscErrorCode ierr;
1486   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1487   MatScalar      *aa,*aa_i;
1488   PetscScalar    *v_i;
1489 
1490   PetscFunctionBegin;
1491   bs  = A->rmap->bs;
1492   ai  = a->i;
1493   aj  = a->j;
1494   aa  = a->a;
1495   bs2 = a->bs2;
1496 
1497   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1498 
1499   bn  = row/bs;   /* Block number */
1500   bp  = row % bs; /* Block Position */
1501   M   = ai[bn+1] - ai[bn];
1502   *nz = bs*M;
1503 
1504   if (v) {
1505     *v = 0;
1506     if (*nz) {
1507       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1508       for (i=0; i<M; i++) { /* for each block in the block row */
1509         v_i  = *v + i*bs;
1510         aa_i = aa + bs2*(ai[bn] + i);
1511         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1512       }
1513     }
1514   }
1515 
1516   if (idx) {
1517     *idx = 0;
1518     if (*nz) {
1519       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1520       for (i=0; i<M; i++) { /* for each block in the block row */
1521         idx_i = *idx + i*bs;
1522         itmp  = bs*aj[ai[bn] + i];
1523         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1524       }
1525     }
1526   }
1527   PetscFunctionReturn(0);
1528 }
1529 
1530 #undef __FUNCT__
1531 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1532 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1533 {
1534   PetscErrorCode ierr;
1535 
1536   PetscFunctionBegin;
1537   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1538   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1539   PetscFunctionReturn(0);
1540 }
1541 
1542 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1543 
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1546 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1547 {
1548   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1549   Mat            C;
1550   PetscErrorCode ierr;
1551   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1552   PetscInt       *rows,*cols,bs2=a->bs2;
1553   MatScalar      *array;
1554 
1555   PetscFunctionBegin;
1556   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1557   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1558     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1559     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1560 
1561     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1562     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1563     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1564     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1565     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1566     ierr = PetscFree(col);CHKERRQ(ierr);
1567   } else {
1568     C = *B;
1569   }
1570 
1571   array = a->a;
1572   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1573   for (i=0; i<mbs; i++) {
1574     cols[0] = i*bs;
1575     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1576     len = ai[i+1] - ai[i];
1577     for (j=0; j<len; j++) {
1578       rows[0] = (*aj++)*bs;
1579       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1580       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1581       array += bs2;
1582     }
1583   }
1584   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1585 
1586   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1587   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1588 
1589   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1590     *B = C;
1591   } else {
1592     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1593   }
1594   PetscFunctionReturn(0);
1595 }
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1600 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1601 {
1602   PetscErrorCode ierr;
1603   Mat            Btrans;
1604 
1605   PetscFunctionBegin;
1606   *f = PETSC_FALSE;
1607   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1608   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1609   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1610   PetscFunctionReturn(0);
1611 }
1612 EXTERN_C_END
1613 
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1616 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1617 {
1618   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1619   PetscErrorCode ierr;
1620   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1621   int            fd;
1622   PetscScalar    *aa;
1623   FILE           *file;
1624 
1625   PetscFunctionBegin;
1626   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1627   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1628   col_lens[0] = MAT_FILE_CLASSID;
1629 
1630   col_lens[1] = A->rmap->N;
1631   col_lens[2] = A->cmap->n;
1632   col_lens[3] = a->nz*bs2;
1633 
1634   /* store lengths of each row and write (including header) to file */
1635   count = 0;
1636   for (i=0; i<a->mbs; i++) {
1637     for (j=0; j<bs; j++) {
1638       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1639     }
1640   }
1641   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1642   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1643 
1644   /* store column indices (zero start index) */
1645   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1646   count = 0;
1647   for (i=0; i<a->mbs; i++) {
1648     for (j=0; j<bs; j++) {
1649       for (k=a->i[i]; k<a->i[i+1]; k++) {
1650         for (l=0; l<bs; l++) {
1651           jj[count++] = bs*a->j[k] + l;
1652         }
1653       }
1654     }
1655   }
1656   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1657   ierr = PetscFree(jj);CHKERRQ(ierr);
1658 
1659   /* store nonzero values */
1660   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1661   count = 0;
1662   for (i=0; i<a->mbs; i++) {
1663     for (j=0; j<bs; j++) {
1664       for (k=a->i[i]; k<a->i[i+1]; k++) {
1665         for (l=0; l<bs; l++) {
1666           aa[count++] = a->a[bs2*k + l*bs + j];
1667         }
1668       }
1669     }
1670   }
1671   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1672   ierr = PetscFree(aa);CHKERRQ(ierr);
1673 
1674   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1675   if (file) {
1676     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1677   }
1678   PetscFunctionReturn(0);
1679 }
1680 
1681 #undef __FUNCT__
1682 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1683 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1684 {
1685   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1686   PetscErrorCode    ierr;
1687   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1688   PetscViewerFormat format;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1692   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1693     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1694   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1695     Mat aij;
1696     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1697     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1698     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1699   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1700      PetscFunctionReturn(0);
1701   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1702     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1703     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1704     for (i=0; i<a->mbs; i++) {
1705       for (j=0; j<bs; j++) {
1706         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1707         for (k=a->i[i]; k<a->i[i+1]; k++) {
1708           for (l=0; l<bs; l++) {
1709 #if defined(PETSC_USE_COMPLEX)
1710             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1711               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1712                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1713             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1714               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1715                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1716             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1717               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1718             }
1719 #else
1720             if (a->a[bs2*k + l*bs + j] != 0.0) {
1721               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1722             }
1723 #endif
1724           }
1725         }
1726         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1727       }
1728     }
1729     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1730   } else {
1731     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1732     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1733     for (i=0; i<a->mbs; i++) {
1734       for (j=0; j<bs; j++) {
1735         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1736         for (k=a->i[i]; k<a->i[i+1]; k++) {
1737           for (l=0; l<bs; l++) {
1738 #if defined(PETSC_USE_COMPLEX)
1739             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1740               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1741                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1742             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1743               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1744                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1745             } else {
1746               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1747             }
1748 #else
1749             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1750 #endif
1751           }
1752         }
1753         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1754       }
1755     }
1756     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1757   }
1758   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1759   PetscFunctionReturn(0);
1760 }
1761 
1762 #undef __FUNCT__
1763 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1764 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1765 {
1766   Mat            A = (Mat) Aa;
1767   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1768   PetscErrorCode ierr;
1769   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1770   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1771   MatScalar      *aa;
1772   PetscViewer    viewer;
1773   PetscViewerFormat format;
1774 
1775   PetscFunctionBegin;
1776   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1777   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1778 
1779   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1780 
1781   /* loop over matrix elements drawing boxes */
1782 
1783   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1784     color = PETSC_DRAW_BLUE;
1785     for (i=0,row=0; i<mbs; i++,row+=bs) {
1786       for (j=a->i[i]; j<a->i[i+1]; j++) {
1787         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1788         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1789         aa = a->a + j*bs2;
1790         for (k=0; k<bs; k++) {
1791           for (l=0; l<bs; l++) {
1792             if (PetscRealPart(*aa++) >=  0.) continue;
1793             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1794           }
1795         }
1796       }
1797     }
1798     color = PETSC_DRAW_CYAN;
1799     for (i=0,row=0; i<mbs; i++,row+=bs) {
1800       for (j=a->i[i]; j<a->i[i+1]; j++) {
1801         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1802         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1803         aa = a->a + j*bs2;
1804         for (k=0; k<bs; k++) {
1805           for (l=0; l<bs; l++) {
1806             if (PetscRealPart(*aa++) != 0.) continue;
1807             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1808           }
1809         }
1810       }
1811     }
1812     color = PETSC_DRAW_RED;
1813     for (i=0,row=0; i<mbs; i++,row+=bs) {
1814       for (j=a->i[i]; j<a->i[i+1]; j++) {
1815         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1816         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1817         aa = a->a + j*bs2;
1818         for (k=0; k<bs; k++) {
1819           for (l=0; l<bs; l++) {
1820             if (PetscRealPart(*aa++) <= 0.) continue;
1821             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1822           }
1823         }
1824       }
1825     }
1826   } else {
1827     /* use contour shading to indicate magnitude of values */
1828     /* first determine max of all nonzero values */
1829     PetscDraw   popup;
1830     PetscReal scale,maxv = 0.0;
1831 
1832     for (i=0; i<a->nz*a->bs2; i++) {
1833       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1834     }
1835     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1836     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1837     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1838     for (i=0,row=0; i<mbs; i++,row+=bs) {
1839       for (j=a->i[i]; j<a->i[i+1]; j++) {
1840         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1841         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1842         aa = a->a + j*bs2;
1843         for (k=0; k<bs; k++) {
1844           for (l=0; l<bs; l++) {
1845             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1846             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1847           }
1848         }
1849       }
1850     }
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1857 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1858 {
1859   PetscErrorCode ierr;
1860   PetscReal      xl,yl,xr,yr,w,h;
1861   PetscDraw      draw;
1862   PetscBool      isnull;
1863 
1864   PetscFunctionBegin;
1865   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1866   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1867 
1868   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1869   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1870   xr += w;    yr += h;  xl = -w;     yl = -h;
1871   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1872   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1873   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatView_SeqBAIJ"
1879 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1880 {
1881   PetscErrorCode ierr;
1882   PetscBool      iascii,isbinary,isdraw;
1883 
1884   PetscFunctionBegin;
1885   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1886   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1887   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1888   if (iascii) {
1889     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1890   } else if (isbinary) {
1891     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1892   } else if (isdraw) {
1893     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1894   } else {
1895     Mat B;
1896     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1897     ierr = MatView(B,viewer);CHKERRQ(ierr);
1898     ierr = MatDestroy(&B);CHKERRQ(ierr);
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1906 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1907 {
1908   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1909   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1910   PetscInt    *ai = a->i,*ailen = a->ilen;
1911   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1912   MatScalar   *ap,*aa = a->a;
1913 
1914   PetscFunctionBegin;
1915   for (k=0; k<m; k++) { /* loop over rows */
1916     row  = im[k]; brow = row/bs;
1917     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1918     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1919     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1920     nrow = ailen[brow];
1921     for (l=0; l<n; l++) { /* loop over columns */
1922       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1923       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1924       col  = in[l] ;
1925       bcol = col/bs;
1926       cidx = col%bs;
1927       ridx = row%bs;
1928       high = nrow;
1929       low  = 0; /* assume unsorted */
1930       while (high-low > 5) {
1931         t = (low+high)/2;
1932         if (rp[t] > bcol) high = t;
1933         else             low  = t;
1934       }
1935       for (i=low; i<high; i++) {
1936         if (rp[i] > bcol) break;
1937         if (rp[i] == bcol) {
1938           *v++ = ap[bs2*i+bs*cidx+ridx];
1939           goto finished;
1940         }
1941       }
1942       *v++ = 0.0;
1943       finished:;
1944     }
1945   }
1946   PetscFunctionReturn(0);
1947 }
1948 
1949 #undef __FUNCT__
1950 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1951 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1952 {
1953   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1954   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1955   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1956   PetscErrorCode    ierr;
1957   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1958   PetscBool         roworiented=a->roworiented;
1959   const PetscScalar *value = v;
1960   MatScalar         *ap,*aa = a->a,*bap;
1961 
1962   PetscFunctionBegin;
1963   if (roworiented) {
1964     stepval = (n-1)*bs;
1965   } else {
1966     stepval = (m-1)*bs;
1967   }
1968   for (k=0; k<m; k++) { /* loop over added rows */
1969     row  = im[k];
1970     if (row < 0) continue;
1971 #if defined(PETSC_USE_DEBUG)
1972     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1973 #endif
1974     rp   = aj + ai[row];
1975     ap   = aa + bs2*ai[row];
1976     rmax = imax[row];
1977     nrow = ailen[row];
1978     low  = 0;
1979     high = nrow;
1980     for (l=0; l<n; l++) { /* loop over added columns */
1981       if (in[l] < 0) continue;
1982 #if defined(PETSC_USE_DEBUG)
1983       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1984 #endif
1985       col = in[l];
1986       if (roworiented) {
1987         value = v + (k*(stepval+bs) + l)*bs;
1988       } else {
1989         value = v + (l*(stepval+bs) + k)*bs;
1990       }
1991       if (col <= lastcol) low = 0; else high = nrow;
1992       lastcol = col;
1993       while (high-low > 7) {
1994         t = (low+high)/2;
1995         if (rp[t] > col) high = t;
1996         else             low  = t;
1997       }
1998       for (i=low; i<high; i++) {
1999         if (rp[i] > col) break;
2000         if (rp[i] == col) {
2001           bap  = ap +  bs2*i;
2002           if (roworiented) {
2003             if (is == ADD_VALUES) {
2004               for (ii=0; ii<bs; ii++,value+=stepval) {
2005                 for (jj=ii; jj<bs2; jj+=bs) {
2006                   bap[jj] += *value++;
2007                 }
2008               }
2009             } else {
2010               for (ii=0; ii<bs; ii++,value+=stepval) {
2011                 for (jj=ii; jj<bs2; jj+=bs) {
2012                   bap[jj] = *value++;
2013                 }
2014               }
2015             }
2016           } else {
2017             if (is == ADD_VALUES) {
2018               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2019                 for (jj=0; jj<bs; jj++) {
2020                   bap[jj] += value[jj];
2021                 }
2022                 bap += bs;
2023               }
2024             } else {
2025               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2026                 for (jj=0; jj<bs; jj++) {
2027                   bap[jj]  = value[jj];
2028                 }
2029                 bap += bs;
2030               }
2031             }
2032           }
2033           goto noinsert2;
2034         }
2035       }
2036       if (nonew == 1) goto noinsert2;
2037       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2038       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2039       N = nrow++ - 1; high++;
2040       /* shift up all the later entries in this row */
2041       for (ii=N; ii>=i; ii--) {
2042         rp[ii+1] = rp[ii];
2043         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2044       }
2045       if (N >= i) {
2046         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2047       }
2048       rp[i] = col;
2049       bap   = ap +  bs2*i;
2050       if (roworiented) {
2051         for (ii=0; ii<bs; ii++,value+=stepval) {
2052           for (jj=ii; jj<bs2; jj+=bs) {
2053             bap[jj] = *value++;
2054           }
2055         }
2056       } else {
2057         for (ii=0; ii<bs; ii++,value+=stepval) {
2058           for (jj=0; jj<bs; jj++) {
2059             *bap++  = *value++;
2060           }
2061         }
2062       }
2063       noinsert2:;
2064       low = i;
2065     }
2066     ailen[row] = nrow;
2067   }
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #undef __FUNCT__
2072 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2073 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2074 {
2075   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2076   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2077   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2078   PetscErrorCode ierr;
2079   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2080   MatScalar      *aa = a->a,*ap;
2081   PetscReal      ratio=0.6;
2082 
2083   PetscFunctionBegin;
2084   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2085 
2086   if (m) rmax = ailen[0];
2087   for (i=1; i<mbs; i++) {
2088     /* move each row back by the amount of empty slots (fshift) before it*/
2089     fshift += imax[i-1] - ailen[i-1];
2090     rmax   = PetscMax(rmax,ailen[i]);
2091     if (fshift) {
2092       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2093       N = ailen[i];
2094       for (j=0; j<N; j++) {
2095         ip[j-fshift] = ip[j];
2096         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2097       }
2098     }
2099     ai[i] = ai[i-1] + ailen[i-1];
2100   }
2101   if (mbs) {
2102     fshift += imax[mbs-1] - ailen[mbs-1];
2103     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2104   }
2105   /* reset ilen and imax for each row */
2106   for (i=0; i<mbs; i++) {
2107     ailen[i] = imax[i] = ai[i+1] - ai[i];
2108   }
2109   a->nz = ai[mbs];
2110 
2111   /* diagonals may have moved, so kill the diagonal pointers */
2112   a->idiagvalid = PETSC_FALSE;
2113   if (fshift && a->diag) {
2114     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2115     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2116     a->diag = 0;
2117   }
2118   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2119   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2120   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2121   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2122   A->info.mallocs     += a->reallocs;
2123   a->reallocs          = 0;
2124   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2125 
2126   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2127   A->same_nonzero = PETSC_TRUE;
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 /*
2132    This function returns an array of flags which indicate the locations of contiguous
2133    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2134    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2135    Assume: sizes should be long enough to hold all the values.
2136 */
2137 #undef __FUNCT__
2138 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2139 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2140 {
2141   PetscInt   i,j,k,row;
2142   PetscBool  flg;
2143 
2144   PetscFunctionBegin;
2145   for (i=0,j=0; i<n; j++) {
2146     row = idx[i];
2147     if (row%bs!=0) { /* Not the begining of a block */
2148       sizes[j] = 1;
2149       i++;
2150     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2151       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2152       i++;
2153     } else { /* Begining of the block, so check if the complete block exists */
2154       flg = PETSC_TRUE;
2155       for (k=1; k<bs; k++) {
2156         if (row+k != idx[i+k]) { /* break in the block */
2157           flg = PETSC_FALSE;
2158           break;
2159         }
2160       }
2161       if (flg) { /* No break in the bs */
2162         sizes[j] = bs;
2163         i+= bs;
2164       } else {
2165         sizes[j] = 1;
2166         i++;
2167       }
2168     }
2169   }
2170   *bs_max = j;
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2176 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2177 {
2178   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2179   PetscErrorCode    ierr;
2180   PetscInt          i,j,k,count,*rows;
2181   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2182   PetscScalar       zero = 0.0;
2183   MatScalar         *aa;
2184   const PetscScalar *xx;
2185   PetscScalar       *bb;
2186 
2187   PetscFunctionBegin;
2188   /* fix right hand side if needed */
2189   if (x && b) {
2190     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2191     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2192     for (i=0; i<is_n; i++) {
2193       bb[is_idx[i]] = diag*xx[is_idx[i]];
2194     }
2195     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2196     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2197   }
2198 
2199   /* Make a copy of the IS and  sort it */
2200   /* allocate memory for rows,sizes */
2201   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2202 
2203   /* copy IS values to rows, and sort them */
2204   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2205   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2206 
2207   if (baij->keepnonzeropattern) {
2208     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2209     bs_max = is_n;
2210     A->same_nonzero = PETSC_TRUE;
2211   } else {
2212     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2213     A->same_nonzero = PETSC_FALSE;
2214   }
2215 
2216   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2217     row   = rows[j];
2218     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2219     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2220     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2221     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2222       if (diag != (PetscScalar)0.0) {
2223         if (baij->ilen[row/bs] > 0) {
2224           baij->ilen[row/bs]       = 1;
2225           baij->j[baij->i[row/bs]] = row/bs;
2226           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2227         }
2228         /* Now insert all the diagonal values for this bs */
2229         for (k=0; k<bs; k++) {
2230           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2231         }
2232       } else { /* (diag == 0.0) */
2233         baij->ilen[row/bs] = 0;
2234       } /* end (diag == 0.0) */
2235     } else { /* (sizes[i] != bs) */
2236 #if defined (PETSC_USE_DEBUG)
2237       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2238 #endif
2239       for (k=0; k<count; k++) {
2240         aa[0] =  zero;
2241         aa    += bs;
2242       }
2243       if (diag != (PetscScalar)0.0) {
2244         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2245       }
2246     }
2247   }
2248 
2249   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2250   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2256 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2257 {
2258   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2259   PetscErrorCode    ierr;
2260   PetscInt          i,j,k,count;
2261   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2262   PetscScalar       zero = 0.0;
2263   MatScalar         *aa;
2264   const PetscScalar *xx;
2265   PetscScalar       *bb;
2266   PetscBool         *zeroed,vecs = PETSC_FALSE;
2267 
2268   PetscFunctionBegin;
2269   /* fix right hand side if needed */
2270   if (x && b) {
2271     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2272     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2273     vecs = PETSC_TRUE;
2274   }
2275   A->same_nonzero = PETSC_TRUE;
2276 
2277   /* zero the columns */
2278   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2279   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2280   for (i=0; i<is_n; i++) {
2281     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2282     zeroed[is_idx[i]] = PETSC_TRUE;
2283   }
2284   for (i=0; i<A->rmap->N; i++) {
2285     if (!zeroed[i]) {
2286       row = i/bs;
2287       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2288         for (k=0; k<bs; k++) {
2289           col = bs*baij->j[j] + k;
2290           if (zeroed[col]) {
2291             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2292             if (vecs) bb[i] -= aa[0]*xx[col];
2293             aa[0] = 0.0;
2294           }
2295         }
2296       }
2297     } else if (vecs) bb[i] = diag*xx[i];
2298   }
2299   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2300   if (vecs) {
2301     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2302     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2303   }
2304 
2305   /* zero the rows */
2306   for (i=0; i<is_n; i++) {
2307     row   = is_idx[i];
2308     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2309     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2310     for (k=0; k<count; k++) {
2311       aa[0] =  zero;
2312       aa    += bs;
2313     }
2314     if (diag != (PetscScalar)0.0) {
2315       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2316     }
2317   }
2318   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2319   PetscFunctionReturn(0);
2320 }
2321 
2322 #undef __FUNCT__
2323 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2324 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2325 {
2326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2327   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2328   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2329   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2330   PetscErrorCode ierr;
2331   PetscInt       ridx,cidx,bs2=a->bs2;
2332   PetscBool      roworiented=a->roworiented;
2333   MatScalar      *ap,value,*aa=a->a,*bap;
2334 
2335   PetscFunctionBegin;
2336   if (v) PetscValidScalarPointer(v,6);
2337   for (k=0; k<m; k++) { /* loop over added rows */
2338     row  = im[k];
2339     brow = row/bs;
2340     if (row < 0) continue;
2341 #if defined(PETSC_USE_DEBUG)
2342     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2343 #endif
2344     rp   = aj + ai[brow];
2345     ap   = aa + bs2*ai[brow];
2346     rmax = imax[brow];
2347     nrow = ailen[brow];
2348     low  = 0;
2349     high = nrow;
2350     for (l=0; l<n; l++) { /* loop over added columns */
2351       if (in[l] < 0) continue;
2352 #if defined(PETSC_USE_DEBUG)
2353       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2354 #endif
2355       col = in[l]; bcol = col/bs;
2356       ridx = row % bs; cidx = col % bs;
2357       if (roworiented) {
2358         value = v[l + k*n];
2359       } else {
2360         value = v[k + l*m];
2361       }
2362       if (col <= lastcol) low = 0; else high = nrow;
2363       lastcol = col;
2364       while (high-low > 7) {
2365         t = (low+high)/2;
2366         if (rp[t] > bcol) high = t;
2367         else              low  = t;
2368       }
2369       for (i=low; i<high; i++) {
2370         if (rp[i] > bcol) break;
2371         if (rp[i] == bcol) {
2372           bap  = ap +  bs2*i + bs*cidx + ridx;
2373           if (is == ADD_VALUES) *bap += value;
2374           else                  *bap  = value;
2375           goto noinsert1;
2376         }
2377       }
2378       if (nonew == 1) goto noinsert1;
2379       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2380       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2381       N = nrow++ - 1; high++;
2382       /* shift up all the later entries in this row */
2383       for (ii=N; ii>=i; ii--) {
2384         rp[ii+1] = rp[ii];
2385         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2386       }
2387       if (N>=i) {
2388         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2389       }
2390       rp[i]                      = bcol;
2391       ap[bs2*i + bs*cidx + ridx] = value;
2392       a->nz++;
2393       noinsert1:;
2394       low = i;
2395     }
2396     ailen[brow] = nrow;
2397   }
2398   A->same_nonzero = PETSC_FALSE;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2404 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2405 {
2406   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2407   Mat            outA;
2408   PetscErrorCode ierr;
2409   PetscBool      row_identity,col_identity;
2410 
2411   PetscFunctionBegin;
2412   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2413   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2414   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2415   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2416 
2417   outA            = inA;
2418   inA->factortype = MAT_FACTOR_LU;
2419 
2420   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2421 
2422   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2423   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2424   a->row = row;
2425   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2426   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2427   a->col = col;
2428 
2429   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2430   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2431    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2432   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2433 
2434   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2435   if (!a->solve_work) {
2436     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2437     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2438   }
2439   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 EXTERN_C_BEGIN
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2446 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2447 {
2448   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2449   PetscInt    i,nz,mbs;
2450 
2451   PetscFunctionBegin;
2452   nz  = baij->maxnz;
2453   mbs = baij->mbs;
2454   for (i=0; i<nz; i++) {
2455     baij->j[i] = indices[i];
2456   }
2457   baij->nz = nz;
2458   for (i=0; i<mbs; i++) {
2459     baij->ilen[i] = baij->imax[i];
2460   }
2461   PetscFunctionReturn(0);
2462 }
2463 EXTERN_C_END
2464 
2465 #undef __FUNCT__
2466 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2467 /*@
2468     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2469        in the matrix.
2470 
2471   Input Parameters:
2472 +  mat - the SeqBAIJ matrix
2473 -  indices - the column indices
2474 
2475   Level: advanced
2476 
2477   Notes:
2478     This can be called if you have precomputed the nonzero structure of the
2479   matrix and want to provide it to the matrix object to improve the performance
2480   of the MatSetValues() operation.
2481 
2482     You MUST have set the correct numbers of nonzeros per row in the call to
2483   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2484 
2485     MUST be called before any calls to MatSetValues();
2486 
2487 @*/
2488 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2489 {
2490   PetscErrorCode ierr;
2491 
2492   PetscFunctionBegin;
2493   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2494   PetscValidPointer(indices,2);
2495   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2501 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2502 {
2503   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2504   PetscErrorCode ierr;
2505   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2506   PetscReal      atmp;
2507   PetscScalar    *x,zero = 0.0;
2508   MatScalar      *aa;
2509   PetscInt       ncols,brow,krow,kcol;
2510 
2511   PetscFunctionBegin;
2512   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2513   bs   = A->rmap->bs;
2514   aa   = a->a;
2515   ai   = a->i;
2516   aj   = a->j;
2517   mbs  = a->mbs;
2518 
2519   ierr = VecSet(v,zero);CHKERRQ(ierr);
2520   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2521   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2522   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2523   for (i=0; i<mbs; i++) {
2524     ncols = ai[1] - ai[0]; ai++;
2525     brow  = bs*i;
2526     for (j=0; j<ncols; j++) {
2527       for (kcol=0; kcol<bs; kcol++) {
2528         for (krow=0; krow<bs; krow++) {
2529           atmp = PetscAbsScalar(*aa);aa++;
2530           row = brow + krow;    /* row index */
2531           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2532           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2533         }
2534       }
2535       aj++;
2536     }
2537   }
2538   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2539   PetscFunctionReturn(0);
2540 }
2541 
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatCopy_SeqBAIJ"
2544 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2545 {
2546   PetscErrorCode ierr;
2547 
2548   PetscFunctionBegin;
2549   /* If the two matrices have the same copy implementation, use fast copy. */
2550   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2551     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2552     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2553     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2554 
2555     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2556     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2557     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2558   } else {
2559     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2560   }
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2566 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2567 {
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2572   PetscFunctionReturn(0);
2573 }
2574 
2575 #undef __FUNCT__
2576 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2577 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2578 {
2579   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2580 
2581   PetscFunctionBegin;
2582   *array = a->a;
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2588 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2589 {
2590   PetscFunctionBegin;
2591   PetscFunctionReturn(0);
2592 }
2593 
2594 #undef __FUNCT__
2595 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2596 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2597 {
2598   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2599   PetscErrorCode ierr;
2600   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2601   PetscBLASInt   one=1;
2602 
2603   PetscFunctionBegin;
2604   if (str == SAME_NONZERO_PATTERN) {
2605     PetscScalar  alpha = a;
2606     PetscBLASInt bnz;
2607     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2608     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2609   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2610     if (y->xtoy && y->XtoY != X) {
2611       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2612       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2613     }
2614     if (!y->xtoy) { /* get xtoy */
2615       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2616       y->XtoY = X;
2617       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2618     }
2619     for (i=0; i<x->nz; i++) {
2620       j = 0;
2621       while (j < bs2) {
2622         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2623         j++;
2624       }
2625     }
2626     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2627   } else {
2628     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2629   }
2630   PetscFunctionReturn(0);
2631 }
2632 
2633 #undef __FUNCT__
2634 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2635 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2636 {
2637   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2638   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2639   MatScalar      *aa = a->a;
2640 
2641   PetscFunctionBegin;
2642   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2643   PetscFunctionReturn(0);
2644 }
2645 
2646 #undef __FUNCT__
2647 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2648 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2649 {
2650   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2651   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2652   MatScalar      *aa = a->a;
2653 
2654   PetscFunctionBegin;
2655   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2656   PetscFunctionReturn(0);
2657 }
2658 
2659 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2663 /*
2664     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2665 */
2666 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2667 {
2668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2669   PetscErrorCode ierr;
2670   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2671   PetscInt       nz = a->i[m],row,*jj,mr,col;
2672 
2673   PetscFunctionBegin;
2674   *nn = n;
2675   if (!ia) PetscFunctionReturn(0);
2676   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2677   else {
2678     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2679     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2680     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2681     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2682     jj = a->j;
2683     for (i=0; i<nz; i++) {
2684       collengths[jj[i]]++;
2685     }
2686     cia[0] = oshift;
2687     for (i=0; i<n; i++) {
2688       cia[i+1] = cia[i] + collengths[i];
2689     }
2690     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2691     jj   = a->j;
2692     for (row=0; row<m; row++) {
2693       mr = a->i[row+1] - a->i[row];
2694       for (i=0; i<mr; i++) {
2695         col = *jj++;
2696         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2697       }
2698     }
2699     ierr = PetscFree(collengths);CHKERRQ(ierr);
2700     *ia = cia; *ja = cja;
2701   }
2702   PetscFunctionReturn(0);
2703 }
2704 
2705 #undef __FUNCT__
2706 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2707 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2708 {
2709   PetscErrorCode ierr;
2710 
2711   PetscFunctionBegin;
2712   if (!ia) PetscFunctionReturn(0);
2713   ierr = PetscFree(*ia);CHKERRQ(ierr);
2714   ierr = PetscFree(*ja);CHKERRQ(ierr);
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2720 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2721 {
2722   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2723   PetscErrorCode ierr;
2724   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2725   PetscScalar    dx,*y,*xx,*w3_array;
2726   PetscScalar    *vscale_array;
2727   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2728   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2729   void           *fctx = coloring->fctx;
2730   PetscBool      flg = PETSC_FALSE;
2731   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2732   Vec            x1_tmp;
2733 
2734   PetscFunctionBegin;
2735   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2736   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2737   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2738   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2739 
2740   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2741   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2742   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2743   if (flg) {
2744     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2745   } else {
2746     PetscBool  assembled;
2747     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2748     if (assembled) {
2749       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2750     }
2751   }
2752 
2753   x1_tmp = x1;
2754   if (!coloring->vscale) {
2755     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2756   }
2757 
2758   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2759     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2760   }
2761   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2762 
2763   /* Set w1 = F(x1) */
2764   if (!coloring->fset) {
2765     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2766     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2767     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2768   } else {
2769     coloring->fset = PETSC_FALSE;
2770   }
2771 
2772   if (!coloring->w3) {
2773     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2774     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2775   }
2776   w3 = coloring->w3;
2777 
2778     CHKMEMQ;
2779     /* Compute all the local scale factors, including ghost points */
2780   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2781   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2782   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2783   if (ctype == IS_COLORING_GHOSTED) {
2784     col_start = 0; col_end = N;
2785   } else if (ctype == IS_COLORING_GLOBAL) {
2786     xx = xx - start;
2787     vscale_array = vscale_array - start;
2788     col_start = start; col_end = N + start;
2789   }    CHKMEMQ;
2790   for (col=col_start; col<col_end; col++) {
2791     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2792     if (coloring->htype[0] == 'w') {
2793       dx = 1.0 + unorm;
2794     } else {
2795       dx  = xx[col];
2796     }
2797     if (dx == (PetscScalar)0.0) dx = 1.0;
2798     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2799     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2800     dx               *= epsilon;
2801     vscale_array[col] = (PetscScalar)1.0/dx;
2802   }     CHKMEMQ;
2803   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2804   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2805   if (ctype == IS_COLORING_GLOBAL) {
2806     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2807     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2808   }
2809   CHKMEMQ;
2810   if (coloring->vscaleforrow) {
2811     vscaleforrow = coloring->vscaleforrow;
2812   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2813 
2814   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2815   /*
2816     Loop over each color
2817   */
2818   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2819   for (k=0; k<coloring->ncolors; k++) {
2820     coloring->currentcolor = k;
2821     for (i=0; i<bs; i++) {
2822       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2823       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2824       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2825       /*
2826         Loop over each column associated with color
2827         adding the perturbation to the vector w3.
2828       */
2829       for (l=0; l<coloring->ncolumns[k]; l++) {
2830         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2831         if (coloring->htype[0] == 'w') {
2832           dx = 1.0 + unorm;
2833         } else {
2834           dx  = xx[col];
2835         }
2836         if (dx == (PetscScalar)0.0) dx = 1.0;
2837         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2838         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2839         dx            *= epsilon;
2840         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2841         w3_array[col] += dx;
2842       }
2843       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2844       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2845 
2846       /*
2847         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2848         w2 = F(x1 + dx) - F(x1)
2849       */
2850       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2851       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2852       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2853       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2854 
2855       /*
2856         Loop over rows of vector, putting results into Jacobian matrix
2857       */
2858       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2859       for (l=0; l<coloring->nrows[k]; l++) {
2860         row    = bs*coloring->rows[k][l];             /* local row index */
2861         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2862         for (j=0; j<bs; j++) {
2863           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2864           srows[j]  = row + start + j;
2865         }
2866         ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2867       }
2868       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2869     }
2870   } /* endof for each color */
2871   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2872   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2873   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2874   ierr = PetscFree(srows);CHKERRQ(ierr);
2875 
2876   coloring->currentcolor = -1;
2877   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2878   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2879   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2880   PetscFunctionReturn(0);
2881 }
2882 
2883 /* -------------------------------------------------------------------*/
2884 static struct _MatOps MatOps_Values = {
2885         MatSetValues_SeqBAIJ,
2886         MatGetRow_SeqBAIJ,
2887         MatRestoreRow_SeqBAIJ,
2888         MatMult_SeqBAIJ_N,
2889 /* 4*/  MatMultAdd_SeqBAIJ_N,
2890         MatMultTranspose_SeqBAIJ,
2891         MatMultTransposeAdd_SeqBAIJ,
2892         0,
2893         0,
2894         0,
2895 /* 10*/ 0,
2896         MatLUFactor_SeqBAIJ,
2897         0,
2898         0,
2899         MatTranspose_SeqBAIJ,
2900 /* 15*/ MatGetInfo_SeqBAIJ,
2901         MatEqual_SeqBAIJ,
2902         MatGetDiagonal_SeqBAIJ,
2903         MatDiagonalScale_SeqBAIJ,
2904         MatNorm_SeqBAIJ,
2905 /* 20*/ 0,
2906         MatAssemblyEnd_SeqBAIJ,
2907         MatSetOption_SeqBAIJ,
2908         MatZeroEntries_SeqBAIJ,
2909 /* 24*/ MatZeroRows_SeqBAIJ,
2910         0,
2911         0,
2912         0,
2913         0,
2914 /* 29*/ MatSetUp_SeqBAIJ,
2915         0,
2916         0,
2917         0,
2918         0,
2919 /* 34*/ MatDuplicate_SeqBAIJ,
2920         0,
2921         0,
2922         MatILUFactor_SeqBAIJ,
2923         0,
2924 /* 39*/ MatAXPY_SeqBAIJ,
2925         MatGetSubMatrices_SeqBAIJ,
2926         MatIncreaseOverlap_SeqBAIJ,
2927         MatGetValues_SeqBAIJ,
2928         MatCopy_SeqBAIJ,
2929 /* 44*/ 0,
2930         MatScale_SeqBAIJ,
2931         0,
2932         0,
2933         MatZeroRowsColumns_SeqBAIJ,
2934 /* 49*/ 0,
2935         MatGetRowIJ_SeqBAIJ,
2936         MatRestoreRowIJ_SeqBAIJ,
2937         MatGetColumnIJ_SeqBAIJ,
2938         MatRestoreColumnIJ_SeqBAIJ,
2939 /* 54*/ MatFDColoringCreate_SeqAIJ,
2940         0,
2941         0,
2942         0,
2943         MatSetValuesBlocked_SeqBAIJ,
2944 /* 59*/ MatGetSubMatrix_SeqBAIJ,
2945         MatDestroy_SeqBAIJ,
2946         MatView_SeqBAIJ,
2947         0,
2948         0,
2949 /* 64*/ 0,
2950         0,
2951         0,
2952         0,
2953         0,
2954 /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2955         0,
2956         MatConvert_Basic,
2957         0,
2958         0,
2959 /* 74*/ 0,
2960         MatFDColoringApply_BAIJ,
2961         0,
2962         0,
2963         0,
2964 /* 79*/ 0,
2965         0,
2966         0,
2967         0,
2968         MatLoad_SeqBAIJ,
2969 /* 84*/ 0,
2970         0,
2971         0,
2972         0,
2973         0,
2974 /* 89*/ 0,
2975         0,
2976         0,
2977         0,
2978         0,
2979 /* 94*/ 0,
2980         0,
2981         0,
2982         0,
2983         0,
2984 /* 99*/ 0,
2985         0,
2986         0,
2987         0,
2988         0,
2989 /*104*/ 0,
2990         MatRealPart_SeqBAIJ,
2991         MatImaginaryPart_SeqBAIJ,
2992         0,
2993         0,
2994 /*109*/ 0,
2995         0,
2996         0,
2997         0,
2998         MatMissingDiagonal_SeqBAIJ,
2999 /*114*/ 0,
3000         0,
3001         0,
3002         0,
3003         0,
3004 /*119*/ 0,
3005         0,
3006         MatMultHermitianTranspose_SeqBAIJ,
3007         MatMultHermitianTransposeAdd_SeqBAIJ,
3008         0,
3009 /*124*/ 0,
3010         0,
3011         MatInvertBlockDiagonal_SeqBAIJ
3012 };
3013 
3014 EXTERN_C_BEGIN
3015 #undef __FUNCT__
3016 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3017 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3018 {
3019   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3020   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3025 
3026   /* allocate space for values if not already there */
3027   if (!aij->saved_values) {
3028     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3029     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3030   }
3031 
3032   /* copy values over */
3033   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3034   PetscFunctionReturn(0);
3035 }
3036 EXTERN_C_END
3037 
3038 EXTERN_C_BEGIN
3039 #undef __FUNCT__
3040 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3041 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3042 {
3043   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3044   PetscErrorCode ierr;
3045   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3046 
3047   PetscFunctionBegin;
3048   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3049   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3050 
3051   /* copy values over */
3052   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3053   PetscFunctionReturn(0);
3054 }
3055 EXTERN_C_END
3056 
3057 EXTERN_C_BEGIN
3058 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3059 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3060 EXTERN_C_END
3061 
3062 EXTERN_C_BEGIN
3063 #undef __FUNCT__
3064 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3065 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3066 {
3067   Mat_SeqBAIJ    *b;
3068   PetscErrorCode ierr;
3069   PetscInt       i,mbs,nbs,bs2;
3070   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3071 
3072   PetscFunctionBegin;
3073   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3074   if (nz == MAT_SKIP_ALLOCATION) {
3075     skipallocation = PETSC_TRUE;
3076     nz             = 0;
3077   }
3078 
3079   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3080   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3081   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3082   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3083   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3084 
3085   B->preallocated = PETSC_TRUE;
3086 
3087   mbs  = B->rmap->n/bs;
3088   nbs  = B->cmap->n/bs;
3089   bs2  = bs*bs;
3090 
3091   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3092 
3093   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3094   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3095   if (nnz) {
3096     for (i=0; i<mbs; i++) {
3097       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3098       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3099     }
3100   }
3101 
3102   b       = (Mat_SeqBAIJ*)B->data;
3103   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3104     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3105   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3106 
3107   if (!flg) {
3108     switch (bs) {
3109     case 1:
3110       B->ops->mult            = MatMult_SeqBAIJ_1;
3111       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3112       B->ops->sor             = MatSOR_SeqBAIJ_1;
3113       break;
3114     case 2:
3115       B->ops->mult            = MatMult_SeqBAIJ_2;
3116       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3117       B->ops->sor             = MatSOR_SeqBAIJ_2;
3118       break;
3119     case 3:
3120       B->ops->mult            = MatMult_SeqBAIJ_3;
3121       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3122       B->ops->sor             = MatSOR_SeqBAIJ_3;
3123       break;
3124     case 4:
3125       B->ops->mult            = MatMult_SeqBAIJ_4;
3126       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3127       B->ops->sor             = MatSOR_SeqBAIJ_4;
3128       break;
3129     case 5:
3130       B->ops->mult            = MatMult_SeqBAIJ_5;
3131       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3132       B->ops->sor             = MatSOR_SeqBAIJ_5;
3133       break;
3134     case 6:
3135       B->ops->mult            = MatMult_SeqBAIJ_6;
3136       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3137       B->ops->sor             = MatSOR_SeqBAIJ_6;
3138       break;
3139     case 7:
3140       B->ops->mult            = MatMult_SeqBAIJ_7;
3141       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3142       B->ops->sor             = MatSOR_SeqBAIJ_7;
3143       break;
3144     case 15:
3145       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3146       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3147       B->ops->sor             = MatSOR_SeqBAIJ_N;
3148       break;
3149     default:
3150       B->ops->mult            = MatMult_SeqBAIJ_N;
3151       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3152       B->ops->sor             = MatSOR_SeqBAIJ_N;
3153       break;
3154     }
3155   }
3156   b->mbs       = mbs;
3157   b->nbs       = nbs;
3158   if (!skipallocation) {
3159     if (!b->imax) {
3160       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3161       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3162       b->free_imax_ilen = PETSC_TRUE;
3163     }
3164     /* b->ilen will count nonzeros in each block row so far. */
3165     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3166     if (!nnz) {
3167       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3168       else if (nz < 0) nz = 1;
3169       for (i=0; i<mbs; i++) b->imax[i] = nz;
3170       nz = nz*mbs;
3171     } else {
3172       nz = 0;
3173       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3174     }
3175 
3176     /* allocate the matrix space */
3177     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3178     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3179     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3180     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3181     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3182     b->singlemalloc = PETSC_TRUE;
3183     b->i[0] = 0;
3184     for (i=1; i<mbs+1; i++) {
3185       b->i[i] = b->i[i-1] + b->imax[i-1];
3186     }
3187     b->free_a     = PETSC_TRUE;
3188     b->free_ij    = PETSC_TRUE;
3189   } else {
3190     b->free_a     = PETSC_FALSE;
3191     b->free_ij    = PETSC_FALSE;
3192   }
3193 
3194   b->bs2              = bs2;
3195   b->mbs              = mbs;
3196   b->nz               = 0;
3197   b->maxnz            = nz;
3198   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3199   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3200   PetscFunctionReturn(0);
3201 }
3202 EXTERN_C_END
3203 
3204 EXTERN_C_BEGIN
3205 #undef __FUNCT__
3206 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3207 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3208 {
3209   PetscInt       i,m,nz,nz_max=0,*nnz;
3210   PetscScalar    *values=0;
3211   PetscErrorCode ierr;
3212 
3213   PetscFunctionBegin;
3214   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3215   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3216   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3217   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3218   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3219   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3220   m = B->rmap->n/bs;
3221 
3222   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3223   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3224   for (i=0; i<m; i++) {
3225     nz = ii[i+1]- ii[i];
3226     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3227     nz_max = PetscMax(nz_max, nz);
3228     nnz[i] = nz;
3229   }
3230   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3231   ierr = PetscFree(nnz);CHKERRQ(ierr);
3232 
3233   values = (PetscScalar*)V;
3234   if (!values) {
3235     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3236     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3237   }
3238   for (i=0; i<m; i++) {
3239     PetscInt          ncols  = ii[i+1] - ii[i];
3240     const PetscInt    *icols = jj + ii[i];
3241     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3242     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3243   }
3244   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3245   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3246   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3247   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3248   PetscFunctionReturn(0);
3249 }
3250 EXTERN_C_END
3251 
3252 
3253 EXTERN_C_BEGIN
3254 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3255 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3256 #if defined(PETSC_HAVE_MUMPS)
3257 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3258 #endif
3259 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3260 EXTERN_C_END
3261 
3262 /*MC
3263    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3264    block sparse compressed row format.
3265 
3266    Options Database Keys:
3267 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3268 
3269   Level: beginner
3270 
3271 .seealso: MatCreateSeqBAIJ()
3272 M*/
3273 
3274 EXTERN_C_BEGIN
3275 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3276 EXTERN_C_END
3277 
3278 EXTERN_C_BEGIN
3279 #undef __FUNCT__
3280 #define __FUNCT__ "MatCreate_SeqBAIJ"
3281 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3282 {
3283   PetscErrorCode ierr;
3284   PetscMPIInt    size;
3285   Mat_SeqBAIJ    *b;
3286 
3287   PetscFunctionBegin;
3288   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3289   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3290 
3291   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3292   B->data = (void*)b;
3293   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3294   b->row                   = 0;
3295   b->col                   = 0;
3296   b->icol                  = 0;
3297   b->reallocs              = 0;
3298   b->saved_values          = 0;
3299 
3300   b->roworiented           = PETSC_TRUE;
3301   b->nonew                 = 0;
3302   b->diag                  = 0;
3303   b->solve_work            = 0;
3304   b->mult_work             = 0;
3305   B->spptr                 = 0;
3306   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3307   b->keepnonzeropattern    = PETSC_FALSE;
3308   b->xtoy                  = 0;
3309   b->XtoY                  = 0;
3310   B->same_nonzero          = PETSC_FALSE;
3311 
3312   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3313                                      "MatGetFactorAvailable_seqbaij_petsc",
3314                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3315   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3316                                      "MatGetFactor_seqbaij_petsc",
3317                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3318   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3319                                      "MatGetFactor_seqbaij_bstrm",
3320                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3321 #if defined(PETSC_HAVE_MUMPS)
3322   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3323 #endif
3324   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3325                                      "MatInvertBlockDiagonal_SeqBAIJ",
3326                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3327   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3328                                      "MatStoreValues_SeqBAIJ",
3329                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3330   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3331                                      "MatRetrieveValues_SeqBAIJ",
3332                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3333   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3334                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3335                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3336   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3337                                      "MatConvert_SeqBAIJ_SeqAIJ",
3338                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3340                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3341                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3342   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3343                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3344                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3345   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3346                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3347                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3348   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3349                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3350                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3351   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3352                                      "MatIsTranspose_SeqBAIJ",
3353                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3354   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3355   PetscFunctionReturn(0);
3356 }
3357 EXTERN_C_END
3358 
3359 #undef __FUNCT__
3360 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3361 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3362 {
3363   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3364   PetscErrorCode ierr;
3365   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3366 
3367   PetscFunctionBegin;
3368   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3369 
3370   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3371     c->imax = a->imax;
3372     c->ilen = a->ilen;
3373     c->free_imax_ilen = PETSC_FALSE;
3374   } else {
3375     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3376     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3377     for (i=0; i<mbs; i++) {
3378       c->imax[i] = a->imax[i];
3379       c->ilen[i] = a->ilen[i];
3380     }
3381     c->free_imax_ilen = PETSC_TRUE;
3382   }
3383 
3384   /* allocate the matrix space */
3385   if (mallocmatspace) {
3386     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3387       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3388       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3389       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3390       c->i            = a->i;
3391       c->j            = a->j;
3392       c->singlemalloc = PETSC_FALSE;
3393       c->free_a       = PETSC_TRUE;
3394       c->free_ij      = PETSC_FALSE;
3395       c->parent       = A;
3396       C->preallocated = PETSC_TRUE;
3397       C->assembled    = PETSC_TRUE;
3398       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3399       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3400       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3401     } else {
3402       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3403       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3404       c->singlemalloc = PETSC_TRUE;
3405       c->free_a       = PETSC_TRUE;
3406       c->free_ij      = PETSC_TRUE;
3407       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3408       if (mbs > 0) {
3409         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3410         if (cpvalues == MAT_COPY_VALUES) {
3411           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3412         } else {
3413           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3414         }
3415       }
3416       C->preallocated = PETSC_TRUE;
3417       C->assembled    = PETSC_TRUE;
3418     }
3419   }
3420 
3421   c->roworiented = a->roworiented;
3422   c->nonew       = a->nonew;
3423   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3424   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3425   c->bs2         = a->bs2;
3426   c->mbs         = a->mbs;
3427   c->nbs         = a->nbs;
3428 
3429   if (a->diag) {
3430     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3431       c->diag      = a->diag;
3432       c->free_diag = PETSC_FALSE;
3433     } else {
3434       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3435       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3436       for (i=0; i<mbs; i++) {
3437         c->diag[i] = a->diag[i];
3438       }
3439       c->free_diag = PETSC_TRUE;
3440     }
3441   } else c->diag        = 0;
3442   c->nz                 = a->nz;
3443   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3444   c->solve_work         = 0;
3445   c->mult_work          = 0;
3446 
3447   c->compressedrow.use     = a->compressedrow.use;
3448   c->compressedrow.nrows   = a->compressedrow.nrows;
3449   c->compressedrow.check   = a->compressedrow.check;
3450   if (a->compressedrow.use) {
3451     i = a->compressedrow.nrows;
3452     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3453     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3454     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3455     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3456   } else {
3457     c->compressedrow.use    = PETSC_FALSE;
3458     c->compressedrow.i      = PETSC_NULL;
3459     c->compressedrow.rindex = PETSC_NULL;
3460   }
3461   C->same_nonzero = A->same_nonzero;
3462   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3463   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 #undef __FUNCT__
3468 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3469 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3470 {
3471   PetscErrorCode ierr;
3472 
3473   PetscFunctionBegin;
3474   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3475   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3476   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3477   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3478   PetscFunctionReturn(0);
3479 }
3480 
3481 #undef __FUNCT__
3482 #define __FUNCT__ "MatLoad_SeqBAIJ"
3483 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3484 {
3485   Mat_SeqBAIJ    *a;
3486   PetscErrorCode ierr;
3487   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3488   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3489   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3490   PetscInt       *masked,nmask,tmp,bs2,ishift;
3491   PetscMPIInt    size;
3492   int            fd;
3493   PetscScalar    *aa;
3494   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3495 
3496   PetscFunctionBegin;
3497   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3498   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3499   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3500   bs2  = bs*bs;
3501 
3502   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3503   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3504   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3505   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3506   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3507   M = header[1]; N = header[2]; nz = header[3];
3508 
3509   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3510   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3511 
3512   /*
3513      This code adds extra rows to make sure the number of rows is
3514     divisible by the blocksize
3515   */
3516   mbs        = M/bs;
3517   extra_rows = bs - M + bs*(mbs);
3518   if (extra_rows == bs) extra_rows = 0;
3519   else                  mbs++;
3520   if (extra_rows) {
3521     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3522   }
3523 
3524   /* Set global sizes if not already set */
3525   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3526     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3527   } else { /* Check if the matrix global sizes are correct */
3528     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3529     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3530       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3531     }
3532     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3533   }
3534 
3535   /* read in row lengths */
3536   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3537   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3538   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3539 
3540   /* read in column indices */
3541   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3542   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3543   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3544 
3545   /* loop over row lengths determining block row lengths */
3546   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3547   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3548   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3549   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3550   rowcount = 0;
3551   nzcount = 0;
3552   for (i=0; i<mbs; i++) {
3553     nmask = 0;
3554     for (j=0; j<bs; j++) {
3555       kmax = rowlengths[rowcount];
3556       for (k=0; k<kmax; k++) {
3557         tmp = jj[nzcount++]/bs;
3558         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3559       }
3560       rowcount++;
3561     }
3562     browlengths[i] += nmask;
3563     /* zero out the mask elements we set */
3564     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3565   }
3566 
3567   /* Do preallocation  */
3568   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3569   a = (Mat_SeqBAIJ*)newmat->data;
3570 
3571   /* set matrix "i" values */
3572   a->i[0] = 0;
3573   for (i=1; i<= mbs; i++) {
3574     a->i[i]      = a->i[i-1] + browlengths[i-1];
3575     a->ilen[i-1] = browlengths[i-1];
3576   }
3577   a->nz         = 0;
3578   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3579 
3580   /* read in nonzero values */
3581   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3582   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3583   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3584 
3585   /* set "a" and "j" values into matrix */
3586   nzcount = 0; jcount = 0;
3587   for (i=0; i<mbs; i++) {
3588     nzcountb = nzcount;
3589     nmask    = 0;
3590     for (j=0; j<bs; j++) {
3591       kmax = rowlengths[i*bs+j];
3592       for (k=0; k<kmax; k++) {
3593         tmp = jj[nzcount++]/bs;
3594         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3595       }
3596     }
3597     /* sort the masked values */
3598     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3599 
3600     /* set "j" values into matrix */
3601     maskcount = 1;
3602     for (j=0; j<nmask; j++) {
3603       a->j[jcount++]  = masked[j];
3604       mask[masked[j]] = maskcount++;
3605     }
3606     /* set "a" values into matrix */
3607     ishift = bs2*a->i[i];
3608     for (j=0; j<bs; j++) {
3609       kmax = rowlengths[i*bs+j];
3610       for (k=0; k<kmax; k++) {
3611         tmp       = jj[nzcountb]/bs ;
3612         block     = mask[tmp] - 1;
3613         point     = jj[nzcountb] - bs*tmp;
3614         idx       = ishift + bs2*block + j + bs*point;
3615         a->a[idx] = (MatScalar)aa[nzcountb++];
3616       }
3617     }
3618     /* zero out the mask elements we set */
3619     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3620   }
3621   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3622 
3623   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3624   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3625   ierr = PetscFree(aa);CHKERRQ(ierr);
3626   ierr = PetscFree(jj);CHKERRQ(ierr);
3627   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3628 
3629   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   PetscFunctionReturn(0);
3632 }
3633 
3634 #undef __FUNCT__
3635 #define __FUNCT__ "MatCreateSeqBAIJ"
3636 /*@C
3637    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3638    compressed row) format.  For good matrix assembly performance the
3639    user should preallocate the matrix storage by setting the parameter nz
3640    (or the array nnz).  By setting these parameters accurately, performance
3641    during matrix assembly can be increased by more than a factor of 50.
3642 
3643    Collective on MPI_Comm
3644 
3645    Input Parameters:
3646 +  comm - MPI communicator, set to PETSC_COMM_SELF
3647 .  bs - size of block
3648 .  m - number of rows
3649 .  n - number of columns
3650 .  nz - number of nonzero blocks  per block row (same for all rows)
3651 -  nnz - array containing the number of nonzero blocks in the various block rows
3652          (possibly different for each block row) or PETSC_NULL
3653 
3654    Output Parameter:
3655 .  A - the matrix
3656 
3657    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3658    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3659    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3660 
3661    Options Database Keys:
3662 .   -mat_no_unroll - uses code that does not unroll the loops in the
3663                      block calculations (much slower)
3664 .    -mat_block_size - size of the blocks to use
3665 
3666    Level: intermediate
3667 
3668    Notes:
3669    The number of rows and columns must be divisible by blocksize.
3670 
3671    If the nnz parameter is given then the nz parameter is ignored
3672 
3673    A nonzero block is any block that as 1 or more nonzeros in it
3674 
3675    The block AIJ format is fully compatible with standard Fortran 77
3676    storage.  That is, the stored row and column indices can begin at
3677    either one (as in Fortran) or zero.  See the users' manual for details.
3678 
3679    Specify the preallocated storage with either nz or nnz (not both).
3680    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3681    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3682    matrices.
3683 
3684 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3685 @*/
3686 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3687 {
3688   PetscErrorCode ierr;
3689 
3690   PetscFunctionBegin;
3691   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3692   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3693   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3694   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3695   PetscFunctionReturn(0);
3696 }
3697 
3698 #undef __FUNCT__
3699 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3700 /*@C
3701    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3702    per row in the matrix. For good matrix assembly performance the
3703    user should preallocate the matrix storage by setting the parameter nz
3704    (or the array nnz).  By setting these parameters accurately, performance
3705    during matrix assembly can be increased by more than a factor of 50.
3706 
3707    Collective on MPI_Comm
3708 
3709    Input Parameters:
3710 +  A - the matrix
3711 .  bs - size of block
3712 .  nz - number of block nonzeros per block row (same for all rows)
3713 -  nnz - array containing the number of block nonzeros in the various block rows
3714          (possibly different for each block row) or PETSC_NULL
3715 
3716    Options Database Keys:
3717 .   -mat_no_unroll - uses code that does not unroll the loops in the
3718                      block calculations (much slower)
3719 .    -mat_block_size - size of the blocks to use
3720 
3721    Level: intermediate
3722 
3723    Notes:
3724    If the nnz parameter is given then the nz parameter is ignored
3725 
3726    You can call MatGetInfo() to get information on how effective the preallocation was;
3727    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3728    You can also run with the option -info and look for messages with the string
3729    malloc in them to see if additional memory allocation was needed.
3730 
3731    The block AIJ format is fully compatible with standard Fortran 77
3732    storage.  That is, the stored row and column indices can begin at
3733    either one (as in Fortran) or zero.  See the users' manual for details.
3734 
3735    Specify the preallocated storage with either nz or nnz (not both).
3736    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3737    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3738 
3739 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3740 @*/
3741 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3742 {
3743   PetscErrorCode ierr;
3744 
3745   PetscFunctionBegin;
3746   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3747   PetscValidType(B,1);
3748   PetscValidLogicalCollectiveInt(B,bs,2);
3749   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3750   PetscFunctionReturn(0);
3751 }
3752 
3753 #undef __FUNCT__
3754 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3755 /*@C
3756    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3757    (the default sequential PETSc format).
3758 
3759    Collective on MPI_Comm
3760 
3761    Input Parameters:
3762 +  A - the matrix
3763 .  i - the indices into j for the start of each local row (starts with zero)
3764 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3765 -  v - optional values in the matrix
3766 
3767    Level: developer
3768 
3769 .keywords: matrix, aij, compressed row, sparse
3770 
3771 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3772 @*/
3773 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3774 {
3775   PetscErrorCode ierr;
3776 
3777   PetscFunctionBegin;
3778   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3779   PetscValidType(B,1);
3780   PetscValidLogicalCollectiveInt(B,bs,2);
3781   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3782   PetscFunctionReturn(0);
3783 }
3784 
3785 
3786 #undef __FUNCT__
3787 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3788 /*@
3789      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3790 
3791      Collective on MPI_Comm
3792 
3793    Input Parameters:
3794 +  comm - must be an MPI communicator of size 1
3795 .  bs - size of block
3796 .  m - number of rows
3797 .  n - number of columns
3798 .  i - row indices
3799 .  j - column indices
3800 -  a - matrix values
3801 
3802    Output Parameter:
3803 .  mat - the matrix
3804 
3805    Level: advanced
3806 
3807    Notes:
3808        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3809     once the matrix is destroyed
3810 
3811        You cannot set new nonzero locations into this matrix, that will generate an error.
3812 
3813        The i and j indices are 0 based
3814 
3815        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3816 
3817 
3818 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3819 
3820 @*/
3821 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3822 {
3823   PetscErrorCode ierr;
3824   PetscInt       ii;
3825   Mat_SeqBAIJ    *baij;
3826 
3827   PetscFunctionBegin;
3828   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3829   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3830 
3831   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3832   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3833   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3834   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3835   baij = (Mat_SeqBAIJ*)(*mat)->data;
3836   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3837   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3838 
3839   baij->i = i;
3840   baij->j = j;
3841   baij->a = a;
3842   baij->singlemalloc = PETSC_FALSE;
3843   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3844   baij->free_a       = PETSC_FALSE;
3845   baij->free_ij      = PETSC_FALSE;
3846 
3847   for (ii=0; ii<m; ii++) {
3848     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3849 #if defined(PETSC_USE_DEBUG)
3850     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3851 #endif
3852   }
3853 #if defined(PETSC_USE_DEBUG)
3854   for (ii=0; ii<baij->i[m]; ii++) {
3855     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3856     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3857   }
3858 #endif
3859 
3860   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3861   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3862   PetscFunctionReturn(0);
3863 }
3864