xref: /petsc/src/mat/impls/baij/seq/baij.c (revision a6dfd86ebdf75fa024070af26d51b62fd16650a3)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <../src/mat/blockinvert.h>
9 
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values)*values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc(2*bs2*mbs*sizeof(PetscScalar),&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory(A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag    = a->idiag;
33   mdiag   = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37     case 1:
38       for (i=0; i<mbs; i++) {
39         odiag = v + 1*diag_offset[i];
40         diag[0]  = odiag[0];
41         mdiag[0] = odiag[0];
42         diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43         diag    += 1;
44         mdiag   += 1;
45       }
46       break;
47     case 2:
48       for (i=0; i<mbs; i++) {
49         odiag   = v + 4*diag_offset[i];
50         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52         ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53         diag    += 4;
54         mdiag   += 4;
55       }
56       break;
57     case 3:
58       for (i=0; i<mbs; i++) {
59         odiag    = v + 9*diag_offset[i];
60         diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61         diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62         diag[8]  = odiag[8];
63         mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64         mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65         mdiag[8] = odiag[8];
66         ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67         diag    += 9;
68         mdiag   += 9;
69       }
70       break;
71     case 4:
72       for (i=0; i<mbs; i++) {
73         odiag  = v + 16*diag_offset[i];
74         ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75         ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76         ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77         diag  += 16;
78         mdiag += 16;
79       }
80       break;
81     case 5:
82       for (i=0; i<mbs; i++) {
83         odiag = v + 25*diag_offset[i];
84         ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85         ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86         ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87         diag  += 25;
88         mdiag += 25;
89       }
90       break;
91     case 6:
92       for (i=0; i<mbs; i++) {
93         odiag = v + 36*diag_offset[i];
94         ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95         ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96         ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97         diag  += 36;
98         mdiag += 36;
99       }
100       break;
101     case 7:
102       for (i=0; i<mbs; i++) {
103         odiag = v + 49*diag_offset[i];
104         ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105         ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106         ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107         diag  += 49;
108         mdiag += 49;
109       }
110       break;
111     default:
112       ierr = PetscMalloc2(bs,MatScalar,&v_work,bs,PetscInt,&v_pivots);CHKERRQ(ierr);
113       for (i=0; i<mbs; i++) {
114         odiag = v + bs2*diag_offset[i];
115         ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116         ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117         ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118         diag  += bs2;
119         mdiag += bs2;
120       }
121       ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ_1"
129 PetscErrorCode MatSOR_SeqBAIJ_1(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar        *x,x1,s1;
133   const PetscScalar  *b;
134   const MatScalar    *aa = a->a, *idiag,*mdiag,*v;
135   PetscErrorCode     ierr;
136   PetscInt           m = a->mbs,i,i2,nz,j;
137   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
138 
139   PetscFunctionBegin;
140   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
141   its = its*lits;
142   if (its <= 0) SETERRQ2(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
143   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
144   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
145   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
146   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
149 
150   diag  = a->diag;
151   idiag = a->idiag;
152   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
153   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
154 
155   if (flag & SOR_ZERO_INITIAL_GUESS) {
156     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
157       x[0] = b[0]*idiag[0];
158       i2     = 1;
159       idiag += 1;
160       for (i=1; i<m; i++) {
161         v     = aa + ai[i];
162         vi    = aj + ai[i];
163         nz    = diag[i] - ai[i];
164         s1    = b[i2];
165         for (j=0; j<nz; j++) {
166           s1 -= v[j]*x[vi[j]];
167         }
168         x[i2]   = idiag[0]*s1;
169         idiag   += 1;
170         i2      += 1;
171       }
172       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
173       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
174     }
175     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
176         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
177       i2    = 0;
178       mdiag = a->idiag+a->mbs;
179       for (i=0; i<m; i++) {
180         x1      = x[i2];
181         x[i2]   = mdiag[0]*x1;
182         mdiag  += 1;
183         i2     += 1;
184       }
185       ierr = PetscLogFlops(m);CHKERRQ(ierr);
186     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
187       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
188     }
189     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
190       idiag   = a->idiag+a->mbs - 1;
191       i2      = m - 1;
192       x1      = x[i2];
193       x[i2]   = idiag[0]*x1;
194       idiag -= 1;
195       i2    -= 1;
196       for (i=m-2; i>=0; i--) {
197         v     = aa + (diag[i]+1);
198         vi    = aj + diag[i] + 1;
199         nz    = ai[i+1] - diag[i] - 1;
200         s1    = x[i2];
201         for (j=0; j<nz; j++) {
202           s1 -= v[j]*x[vi[j]];
203         }
204         x[i2]   = idiag[0]*s1;
205         idiag   -= 1;
206         i2      -= 1;
207       }
208       ierr = PetscLogFlops(a->nz);CHKERRQ(ierr);
209     }
210   } else {
211     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
212   }
213   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
214   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
215   PetscFunctionReturn(0);
216 }
217 
218 #undef __FUNCT__
219 #define __FUNCT__ "MatSOR_SeqBAIJ_2"
220 PetscErrorCode MatSOR_SeqBAIJ_2(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
221 {
222   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
223   PetscScalar        *x,x1,x2,s1,s2;
224   const PetscScalar  *b;
225   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
226   PetscErrorCode     ierr;
227   PetscInt           m = a->mbs,i,i2,nz,idx,j,it;
228   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
229 
230   PetscFunctionBegin;
231   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
232   its = its*lits;
233   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
234   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
235   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
236   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
237   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
238 
239   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
240 
241   diag  = a->diag;
242   idiag = a->idiag;
243   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
244   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
245 
246   if (flag & SOR_ZERO_INITIAL_GUESS) {
247     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
248       x[0] = b[0]*idiag[0] + b[1]*idiag[2];
249       x[1] = b[0]*idiag[1] + b[1]*idiag[3];
250       i2     = 2;
251       idiag += 4;
252       for (i=1; i<m; i++) {
253         v     = aa + 4*ai[i];
254         vi    = aj + ai[i];
255         nz    = diag[i] - ai[i];
256         s1    = b[i2]; s2 = b[i2+1];
257         for (j=0; j<nz; j++) {
258           idx  = 2*vi[j];
259           it   = 4*j;
260           x1   = x[idx]; x2 = x[1+idx];
261           s1  -= v[it]*x1 + v[it+2]*x2;
262           s2  -= v[it+1]*x1 + v[it+3]*x2;
263         }
264         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
265         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
266         idiag   += 4;
267         i2      += 2;
268       }
269       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
270       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
271     }
272     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
273         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
274       i2    = 0;
275       mdiag = a->idiag+4*a->mbs;
276       for (i=0; i<m; i++) {
277         x1      = x[i2]; x2 = x[i2+1];
278         x[i2]   = mdiag[0]*x1 + mdiag[2]*x2;
279         x[i2+1] = mdiag[1]*x1 + mdiag[3]*x2;
280         mdiag  += 4;
281         i2     += 2;
282       }
283       ierr = PetscLogFlops(6.0*m);CHKERRQ(ierr);
284     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
285       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
286     }
287     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
288       idiag   = a->idiag+4*a->mbs - 4;
289       i2      = 2*m - 2;
290       x1      = x[i2]; x2 = x[i2+1];
291       x[i2]   = idiag[0]*x1 + idiag[2]*x2;
292       x[i2+1] = idiag[1]*x1 + idiag[3]*x2;
293       idiag -= 4;
294       i2    -= 2;
295       for (i=m-2; i>=0; i--) {
296         v     = aa + 4*(diag[i]+1);
297         vi    = aj + diag[i] + 1;
298         nz    = ai[i+1] - diag[i] - 1;
299         s1    = x[i2]; s2 = x[i2+1];
300         for (j=0; j<nz; j++) {
301           idx  = 2*vi[j];
302           it   = 4*j;
303           x1   = x[idx]; x2 = x[1+idx];
304           s1  -= v[it]*x1 + v[it+2]*x2;
305           s2  -= v[it+1]*x1 + v[it+3]*x2;
306         }
307         x[i2]   = idiag[0]*s1 + idiag[2]*s2;
308         x[i2+1] = idiag[1]*s1 + idiag[3]*s2;
309         idiag   -= 4;
310         i2      -= 2;
311       }
312       ierr = PetscLogFlops(4.0*(a->nz));CHKERRQ(ierr);
313     }
314   } else {
315     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
316   }
317   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
318   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSOR_SeqBAIJ_3"
324 PetscErrorCode MatSOR_SeqBAIJ_3(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
325 {
326   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
327   PetscScalar        *x,x1,x2,x3,s1,s2,s3;
328   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
329   const PetscScalar  *b;
330   PetscErrorCode     ierr;
331   PetscInt           m = a->mbs,i,i2,nz,idx;
332   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
333 
334   PetscFunctionBegin;
335   its = its*lits;
336   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
337   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
338   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
339   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
340   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
341   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
342 
343   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
344 
345   diag  = a->diag;
346   idiag = a->idiag;
347   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
348   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
349 
350   if (flag & SOR_ZERO_INITIAL_GUESS) {
351     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
352       x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
353       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
354       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];
355       i2     = 3;
356       idiag += 9;
357       for (i=1; i<m; i++) {
358         v     = aa + 9*ai[i];
359         vi    = aj + ai[i];
360         nz    = diag[i] - ai[i];
361         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
362         while (nz--) {
363           idx  = 3*(*vi++);
364           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
365           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
366           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
367           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
368           v   += 9;
369         }
370         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
371         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
372         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
373         idiag   += 9;
374         i2      += 3;
375       }
376       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
377       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
378     }
379     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
380         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
381       i2    = 0;
382       mdiag = a->idiag+9*a->mbs;
383       for (i=0; i<m; i++) {
384         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
385         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
386         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
387         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3;
388         mdiag  += 9;
389         i2     += 3;
390       }
391       ierr = PetscLogFlops(15.0*m);CHKERRQ(ierr);
392     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
393       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
394     }
395     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
396       idiag   = a->idiag+9*a->mbs - 9;
397       i2      = 3*m - 3;
398       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
399       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
400       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
401       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;
402       idiag -= 9;
403       i2    -= 3;
404       for (i=m-2; i>=0; i--) {
405         v     = aa + 9*(diag[i]+1);
406         vi    = aj + diag[i] + 1;
407         nz    = ai[i+1] - diag[i] - 1;
408         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
409         while (nz--) {
410           idx  = 3*(*vi++);
411           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
412           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
413           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
414           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
415           v   += 9;
416         }
417         x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
418         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
419         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;
420         idiag   -= 9;
421         i2      -= 3;
422       }
423       ierr = PetscLogFlops(9.0*(a->nz));CHKERRQ(ierr);
424     }
425   } else {
426     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
427   }
428   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
429   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
430   PetscFunctionReturn(0);
431 }
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "MatSOR_SeqBAIJ_4"
435 PetscErrorCode MatSOR_SeqBAIJ_4(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
436 {
437   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
438   PetscScalar        *x,x1,x2,x3,x4,s1,s2,s3,s4;
439   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
440   const PetscScalar  *b;
441   PetscErrorCode     ierr;
442   PetscInt           m = a->mbs,i,i2,nz,idx;
443   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
444 
445   PetscFunctionBegin;
446   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
447   its = its*lits;
448   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
449   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
450   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
451   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
452   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
453 
454   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
455 
456   diag  = a->diag;
457   idiag = a->idiag;
458   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
459   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
460 
461   if (flag & SOR_ZERO_INITIAL_GUESS) {
462     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
463       x[0] = b[0]*idiag[0] + b[1]*idiag[4] + b[2]*idiag[8]  + b[3]*idiag[12];
464       x[1] = b[0]*idiag[1] + b[1]*idiag[5] + b[2]*idiag[9]  + b[3]*idiag[13];
465       x[2] = b[0]*idiag[2] + b[1]*idiag[6] + b[2]*idiag[10] + b[3]*idiag[14];
466       x[3] = b[0]*idiag[3] + b[1]*idiag[7] + b[2]*idiag[11] + b[3]*idiag[15];
467       i2     = 4;
468       idiag += 16;
469       for (i=1; i<m; i++) {
470         v     = aa + 16*ai[i];
471         vi    = aj + ai[i];
472         nz    = diag[i] - ai[i];
473         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3];
474         while (nz--) {
475           idx  = 4*(*vi++);
476           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
477           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
478           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
479           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
480           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
481           v   += 16;
482         }
483         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
484         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
485         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
486         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
487         idiag   += 16;
488         i2      += 4;
489       }
490       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
491       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
492     }
493     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
494         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
495       i2    = 0;
496       mdiag = a->idiag+16*a->mbs;
497       for (i=0; i<m; i++) {
498         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
499         x[i2]   = mdiag[0]*x1 + mdiag[4]*x2 + mdiag[8]*x3  + mdiag[12]*x4;
500         x[i2+1] = mdiag[1]*x1 + mdiag[5]*x2 + mdiag[9]*x3  + mdiag[13]*x4;
501         x[i2+2] = mdiag[2]*x1 + mdiag[6]*x2 + mdiag[10]*x3 + mdiag[14]*x4;
502         x[i2+3] = mdiag[3]*x1 + mdiag[7]*x2 + mdiag[11]*x3 + mdiag[15]*x4;
503         mdiag  += 16;
504         i2     += 4;
505       }
506       ierr = PetscLogFlops(28.0*m);CHKERRQ(ierr);
507     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
508       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
509     }
510     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
511       idiag   = a->idiag+16*a->mbs - 16;
512       i2      = 4*m - 4;
513       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3];
514       x[i2]   = idiag[0]*x1 + idiag[4]*x2 + idiag[8]*x3  + idiag[12]*x4;
515       x[i2+1] = idiag[1]*x1 + idiag[5]*x2 + idiag[9]*x3  + idiag[13]*x4;
516       x[i2+2] = idiag[2]*x1 + idiag[6]*x2 + idiag[10]*x3 + idiag[14]*x4;
517       x[i2+3] = idiag[3]*x1 + idiag[7]*x2 + idiag[11]*x3 + idiag[15]*x4;
518       idiag -= 16;
519       i2    -= 4;
520       for (i=m-2; i>=0; i--) {
521         v     = aa + 16*(diag[i]+1);
522         vi    = aj + diag[i] + 1;
523         nz    = ai[i+1] - diag[i] - 1;
524         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3];
525         while (nz--) {
526           idx  = 4*(*vi++);
527           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx];
528           s1  -= v[0]*x1 + v[4]*x2 + v[8]*x3  + v[12]*x4;
529           s2  -= v[1]*x1 + v[5]*x2 + v[9]*x3  + v[13]*x4;
530           s3  -= v[2]*x1 + v[6]*x2 + v[10]*x3 + v[14]*x4;
531           s4  -= v[3]*x1 + v[7]*x2 + v[11]*x3 + v[15]*x4;
532           v   += 16;
533         }
534         x[i2]   = idiag[0]*s1 + idiag[4]*s2 + idiag[8]*s3  + idiag[12]*s4;
535         x[i2+1] = idiag[1]*s1 + idiag[5]*s2 + idiag[9]*s3  + idiag[13]*s4;
536         x[i2+2] = idiag[2]*s1 + idiag[6]*s2 + idiag[10]*s3 + idiag[14]*s4;
537         x[i2+3] = idiag[3]*s1 + idiag[7]*s2 + idiag[11]*s3 + idiag[15]*s4;
538         idiag   -= 16;
539         i2      -= 4;
540       }
541       ierr = PetscLogFlops(16.0*(a->nz));CHKERRQ(ierr);
542     }
543   } else {
544     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
545   }
546   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
547   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatSOR_SeqBAIJ_5"
553 PetscErrorCode MatSOR_SeqBAIJ_5(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
554 {
555   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
556   PetscScalar        *x,x1,x2,x3,x4,x5,s1,s2,s3,s4,s5;
557   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
558   const PetscScalar  *b;
559   PetscErrorCode     ierr;
560   PetscInt           m = a->mbs,i,i2,nz,idx;
561   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
562 
563   PetscFunctionBegin;
564   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
565   its = its*lits;
566   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
567   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
568   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
569   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
570   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
571 
572   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
573 
574   diag  = a->diag;
575   idiag = a->idiag;
576   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
577   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
578 
579   if (flag & SOR_ZERO_INITIAL_GUESS) {
580     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
581       x[0] = b[0]*idiag[0] + b[1]*idiag[5] + b[2]*idiag[10] + b[3]*idiag[15] + b[4]*idiag[20];
582       x[1] = b[0]*idiag[1] + b[1]*idiag[6] + b[2]*idiag[11] + b[3]*idiag[16] + b[4]*idiag[21];
583       x[2] = b[0]*idiag[2] + b[1]*idiag[7] + b[2]*idiag[12] + b[3]*idiag[17] + b[4]*idiag[22];
584       x[3] = b[0]*idiag[3] + b[1]*idiag[8] + b[2]*idiag[13] + b[3]*idiag[18] + b[4]*idiag[23];
585       x[4] = b[0]*idiag[4] + b[1]*idiag[9] + b[2]*idiag[14] + b[3]*idiag[19] + b[4]*idiag[24];
586       i2     = 5;
587       idiag += 25;
588       for (i=1; i<m; i++) {
589         v     = aa + 25*ai[i];
590         vi    = aj + ai[i];
591         nz    = diag[i] - ai[i];
592         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4];
593         while (nz--) {
594           idx  = 5*(*vi++);
595           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
596           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
597           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
598           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
599           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
600           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
601           v   += 25;
602         }
603         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
604         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
605         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
606         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
607         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
608         idiag   += 25;
609         i2      += 5;
610       }
611       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
612       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
613     }
614     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
615         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
616       i2    = 0;
617       mdiag = a->idiag+25*a->mbs;
618       for (i=0; i<m; i++) {
619         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
620         x[i2]   = mdiag[0]*x1 + mdiag[5]*x2 + mdiag[10]*x3 + mdiag[15]*x4 + mdiag[20]*x5;
621         x[i2+1] = mdiag[1]*x1 + mdiag[6]*x2 + mdiag[11]*x3 + mdiag[16]*x4 + mdiag[21]*x5;
622         x[i2+2] = mdiag[2]*x1 + mdiag[7]*x2 + mdiag[12]*x3 + mdiag[17]*x4 + mdiag[22]*x5;
623         x[i2+3] = mdiag[3]*x1 + mdiag[8]*x2 + mdiag[13]*x3 + mdiag[18]*x4 + mdiag[23]*x5;
624         x[i2+4] = mdiag[4]*x1 + mdiag[9]*x2 + mdiag[14]*x3 + mdiag[19]*x4 + mdiag[24]*x5;
625         mdiag  += 25;
626         i2     += 5;
627       }
628       ierr = PetscLogFlops(45.0*m);CHKERRQ(ierr);
629     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
630       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
631     }
632     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
633       idiag   = a->idiag+25*a->mbs - 25;
634       i2      = 5*m - 5;
635       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4];
636       x[i2]   = idiag[0]*x1 + idiag[5]*x2 + idiag[10]*x3 + idiag[15]*x4 + idiag[20]*x5;
637       x[i2+1] = idiag[1]*x1 + idiag[6]*x2 + idiag[11]*x3 + idiag[16]*x4 + idiag[21]*x5;
638       x[i2+2] = idiag[2]*x1 + idiag[7]*x2 + idiag[12]*x3 + idiag[17]*x4 + idiag[22]*x5;
639       x[i2+3] = idiag[3]*x1 + idiag[8]*x2 + idiag[13]*x3 + idiag[18]*x4 + idiag[23]*x5;
640       x[i2+4] = idiag[4]*x1 + idiag[9]*x2 + idiag[14]*x3 + idiag[19]*x4 + idiag[24]*x5;
641       idiag -= 25;
642       i2    -= 5;
643       for (i=m-2; i>=0; i--) {
644         v     = aa + 25*(diag[i]+1);
645         vi    = aj + diag[i] + 1;
646         nz    = ai[i+1] - diag[i] - 1;
647         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4];
648         while (nz--) {
649           idx  = 5*(*vi++);
650           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx];
651           s1  -= v[0]*x1 + v[5]*x2 + v[10]*x3 + v[15]*x4 + v[20]*x5;
652           s2  -= v[1]*x1 + v[6]*x2 + v[11]*x3 + v[16]*x4 + v[21]*x5;
653           s3  -= v[2]*x1 + v[7]*x2 + v[12]*x3 + v[17]*x4 + v[22]*x5;
654           s4  -= v[3]*x1 + v[8]*x2 + v[13]*x3 + v[18]*x4 + v[23]*x5;
655           s5  -= v[4]*x1 + v[9]*x2 + v[14]*x3 + v[19]*x4 + v[24]*x5;
656           v   += 25;
657         }
658         x[i2]   = idiag[0]*s1 + idiag[5]*s2 + idiag[10]*s3 + idiag[15]*s4 + idiag[20]*s5;
659         x[i2+1] = idiag[1]*s1 + idiag[6]*s2 + idiag[11]*s3 + idiag[16]*s4 + idiag[21]*s5;
660         x[i2+2] = idiag[2]*s1 + idiag[7]*s2 + idiag[12]*s3 + idiag[17]*s4 + idiag[22]*s5;
661         x[i2+3] = idiag[3]*s1 + idiag[8]*s2 + idiag[13]*s3 + idiag[18]*s4 + idiag[23]*s5;
662         x[i2+4] = idiag[4]*s1 + idiag[9]*s2 + idiag[14]*s3 + idiag[19]*s4 + idiag[24]*s5;
663         idiag   -= 25;
664         i2      -= 5;
665       }
666       ierr = PetscLogFlops(25.0*(a->nz));CHKERRQ(ierr);
667     }
668   } else {
669     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
670   }
671   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
672   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
673   PetscFunctionReturn(0);
674 }
675 
676 #undef __FUNCT__
677 #define __FUNCT__ "MatSOR_SeqBAIJ_6"
678 PetscErrorCode MatSOR_SeqBAIJ_6(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
679 {
680   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
681   PetscScalar        *x,x1,x2,x3,x4,x5,x6,s1,s2,s3,s4,s5,s6;
682   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
683   const PetscScalar  *b;
684   PetscErrorCode     ierr;
685   PetscInt           m = a->mbs,i,i2,nz,idx;
686   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
687 
688   PetscFunctionBegin;
689   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
690   its = its*lits;
691   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
692   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
693   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
694   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
695   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
696 
697   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
698 
699   diag  = a->diag;
700   idiag = a->idiag;
701   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
702   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
703 
704   if (flag & SOR_ZERO_INITIAL_GUESS) {
705     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
706       x[0] = b[0]*idiag[0] + b[1]*idiag[6]  + b[2]*idiag[12] + b[3]*idiag[18] + b[4]*idiag[24] + b[5]*idiag[30];
707       x[1] = b[0]*idiag[1] + b[1]*idiag[7]  + b[2]*idiag[13] + b[3]*idiag[19] + b[4]*idiag[25] + b[5]*idiag[31];
708       x[2] = b[0]*idiag[2] + b[1]*idiag[8]  + b[2]*idiag[14] + b[3]*idiag[20] + b[4]*idiag[26] + b[5]*idiag[32];
709       x[3] = b[0]*idiag[3] + b[1]*idiag[9]  + b[2]*idiag[15] + b[3]*idiag[21] + b[4]*idiag[27] + b[5]*idiag[33];
710       x[4] = b[0]*idiag[4] + b[1]*idiag[10] + b[2]*idiag[16] + b[3]*idiag[22] + b[4]*idiag[28] + b[5]*idiag[34];
711       x[5] = b[0]*idiag[5] + b[1]*idiag[11] + b[2]*idiag[17] + b[3]*idiag[23] + b[4]*idiag[29] + b[5]*idiag[35];
712       i2     = 6;
713       idiag += 36;
714       for (i=1; i<m; i++) {
715         v     = aa + 36*ai[i];
716         vi    = aj + ai[i];
717         nz    = diag[i] - ai[i];
718         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5];
719         while (nz--) {
720           idx  = 6*(*vi++);
721           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
722           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
723           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
724           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
725           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
726           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
727           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
728           v   += 36;
729         }
730         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
731         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
732         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
733         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
734         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
735         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
736         idiag   += 36;
737         i2      += 6;
738       }
739       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
740       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
741     }
742     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
743         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
744       i2    = 0;
745       mdiag = a->idiag+36*a->mbs;
746       for (i=0; i<m; i++) {
747         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
748         x[i2]   = mdiag[0]*x1 + mdiag[6]*x2  + mdiag[12]*x3 + mdiag[18]*x4 + mdiag[24]*x5 + mdiag[30]*x6;
749         x[i2+1] = mdiag[1]*x1 + mdiag[7]*x2  + mdiag[13]*x3 + mdiag[19]*x4 + mdiag[25]*x5 + mdiag[31]*x6;
750         x[i2+2] = mdiag[2]*x1 + mdiag[8]*x2  + mdiag[14]*x3 + mdiag[20]*x4 + mdiag[26]*x5 + mdiag[32]*x6;
751         x[i2+3] = mdiag[3]*x1 + mdiag[9]*x2  + mdiag[15]*x3 + mdiag[21]*x4 + mdiag[27]*x5 + mdiag[33]*x6;
752         x[i2+4] = mdiag[4]*x1 + mdiag[10]*x2 + mdiag[16]*x3 + mdiag[22]*x4 + mdiag[28]*x5 + mdiag[34]*x6;
753         x[i2+5] = mdiag[5]*x1 + mdiag[11]*x2 + mdiag[17]*x3 + mdiag[23]*x4 + mdiag[29]*x5 + mdiag[35]*x6;
754         mdiag  += 36;
755         i2     += 6;
756       }
757       ierr = PetscLogFlops(60.0*m);CHKERRQ(ierr);
758     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
759       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
760     }
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
762       idiag   = a->idiag+36*a->mbs - 36;
763       i2      = 6*m - 6;
764       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5];
765       x[i2]   = idiag[0]*x1 + idiag[6]*x2  + idiag[12]*x3 + idiag[18]*x4 + idiag[24]*x5 + idiag[30]*x6;
766       x[i2+1] = idiag[1]*x1 + idiag[7]*x2  + idiag[13]*x3 + idiag[19]*x4 + idiag[25]*x5 + idiag[31]*x6;
767       x[i2+2] = idiag[2]*x1 + idiag[8]*x2  + idiag[14]*x3 + idiag[20]*x4 + idiag[26]*x5 + idiag[32]*x6;
768       x[i2+3] = idiag[3]*x1 + idiag[9]*x2  + idiag[15]*x3 + idiag[21]*x4 + idiag[27]*x5 + idiag[33]*x6;
769       x[i2+4] = idiag[4]*x1 + idiag[10]*x2 + idiag[16]*x3 + idiag[22]*x4 + idiag[28]*x5 + idiag[34]*x6;
770       x[i2+5] = idiag[5]*x1 + idiag[11]*x2 + idiag[17]*x3 + idiag[23]*x4 + idiag[29]*x5 + idiag[35]*x6;
771       idiag -= 36;
772       i2    -= 6;
773       for (i=m-2; i>=0; i--) {
774         v     = aa + 36*(diag[i]+1);
775         vi    = aj + diag[i] + 1;
776         nz    = ai[i+1] - diag[i] - 1;
777         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5];
778         while (nz--) {
779           idx  = 6*(*vi++);
780           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx];
781           s1  -= v[0]*x1 + v[6]*x2  + v[12]*x3 + v[18]*x4 + v[24]*x5 + v[30]*x6;
782           s2  -= v[1]*x1 + v[7]*x2  + v[13]*x3 + v[19]*x4 + v[25]*x5 + v[31]*x6;
783           s3  -= v[2]*x1 + v[8]*x2  + v[14]*x3 + v[20]*x4 + v[26]*x5 + v[32]*x6;
784           s4  -= v[3]*x1 + v[9]*x2  + v[15]*x3 + v[21]*x4 + v[27]*x5 + v[33]*x6;
785           s5  -= v[4]*x1 + v[10]*x2 + v[16]*x3 + v[22]*x4 + v[28]*x5 + v[34]*x6;
786           s6  -= v[5]*x1 + v[11]*x2 + v[17]*x3 + v[23]*x4 + v[29]*x5 + v[35]*x6;
787           v   += 36;
788         }
789         x[i2]   = idiag[0]*s1 + idiag[6]*s2  + idiag[12]*s3 + idiag[18]*s4 + idiag[24]*s5 + idiag[30]*s6;
790         x[i2+1] = idiag[1]*s1 + idiag[7]*s2  + idiag[13]*s3 + idiag[19]*s4 + idiag[25]*s5 + idiag[31]*s6;
791         x[i2+2] = idiag[2]*s1 + idiag[8]*s2  + idiag[14]*s3 + idiag[20]*s4 + idiag[26]*s5 + idiag[32]*s6;
792         x[i2+3] = idiag[3]*s1 + idiag[9]*s2  + idiag[15]*s3 + idiag[21]*s4 + idiag[27]*s5 + idiag[33]*s6;
793         x[i2+4] = idiag[4]*s1 + idiag[10]*s2 + idiag[16]*s3 + idiag[22]*s4 + idiag[28]*s5 + idiag[34]*s6;
794         x[i2+5] = idiag[5]*s1 + idiag[11]*s2 + idiag[17]*s3 + idiag[23]*s4 + idiag[29]*s5 + idiag[35]*s6;
795         idiag   -= 36;
796         i2      -= 6;
797       }
798       ierr = PetscLogFlops(36.0*(a->nz));CHKERRQ(ierr);
799     }
800   } else {
801     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
802   }
803   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
804   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatSOR_SeqBAIJ_7"
810 PetscErrorCode MatSOR_SeqBAIJ_7(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
811 {
812   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
813   PetscScalar        *x,x1,x2,x3,x4,x5,x6,x7,s1,s2,s3,s4,s5,s6,s7;
814   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
815   const PetscScalar  *b;
816   PetscErrorCode     ierr;
817   PetscInt           m = a->mbs,i,i2,nz,idx;
818   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
819 
820   PetscFunctionBegin;
821   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
822   its = its*lits;
823   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
824   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
825   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
826   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
827   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
828 
829   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
830 
831   diag  = a->diag;
832   idiag = a->idiag;
833   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
834   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
835 
836   if (flag & SOR_ZERO_INITIAL_GUESS) {
837     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
838       x[0] = b[0]*idiag[0] + b[1]*idiag[7]  + b[2]*idiag[14] + b[3]*idiag[21] + b[4]*idiag[28] + b[5]*idiag[35] + b[6]*idiag[42];
839       x[1] = b[0]*idiag[1] + b[1]*idiag[8]  + b[2]*idiag[15] + b[3]*idiag[22] + b[4]*idiag[29] + b[5]*idiag[36] + b[6]*idiag[43];
840       x[2] = b[0]*idiag[2] + b[1]*idiag[9]  + b[2]*idiag[16] + b[3]*idiag[23] + b[4]*idiag[30] + b[5]*idiag[37] + b[6]*idiag[44];
841       x[3] = b[0]*idiag[3] + b[1]*idiag[10] + b[2]*idiag[17] + b[3]*idiag[24] + b[4]*idiag[31] + b[5]*idiag[38] + b[6]*idiag[45];
842       x[4] = b[0]*idiag[4] + b[1]*idiag[11] + b[2]*idiag[18] + b[3]*idiag[25] + b[4]*idiag[32] + b[5]*idiag[39] + b[6]*idiag[46];
843       x[5] = b[0]*idiag[5] + b[1]*idiag[12] + b[2]*idiag[19] + b[3]*idiag[26] + b[4]*idiag[33] + b[5]*idiag[40] + b[6]*idiag[47];
844       x[6] = b[0]*idiag[6] + b[1]*idiag[13] + b[2]*idiag[20] + b[3]*idiag[27] + b[4]*idiag[34] + b[5]*idiag[41] + b[6]*idiag[48];
845       i2     = 7;
846       idiag += 49;
847       for (i=1; i<m; i++) {
848         v     = aa + 49*ai[i];
849         vi    = aj + ai[i];
850         nz    = diag[i] - ai[i];
851         s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2]; s4 = b[i2+3]; s5 = b[i2+4]; s6 = b[i2+5]; s7 = b[i2+6];
852         while (nz--) {
853           idx  = 7*(*vi++);
854           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
855           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
856           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
857           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
858           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
859           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
860           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
861           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
862           v   += 49;
863         }
864         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
865         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
866         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
867         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
868         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
869         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
870         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
871         idiag   += 49;
872         i2      += 7;
873       }
874       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
875       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
876     }
877     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
878         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
879       i2    = 0;
880       mdiag = a->idiag+49*a->mbs;
881       for (i=0; i<m; i++) {
882         x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
883         x[i2]   = mdiag[0]*x1 + mdiag[7]*x2  + mdiag[14]*x3 + mdiag[21]*x4 + mdiag[28]*x5 + mdiag[35]*x6 + mdiag[42]*x7;
884         x[i2+1] = mdiag[1]*x1 + mdiag[8]*x2  + mdiag[15]*x3 + mdiag[22]*x4 + mdiag[29]*x5 + mdiag[36]*x6 + mdiag[43]*x7;
885         x[i2+2] = mdiag[2]*x1 + mdiag[9]*x2  + mdiag[16]*x3 + mdiag[23]*x4 + mdiag[30]*x5 + mdiag[37]*x6 + mdiag[44]*x7;
886         x[i2+3] = mdiag[3]*x1 + mdiag[10]*x2 + mdiag[17]*x3 + mdiag[24]*x4 + mdiag[31]*x5 + mdiag[38]*x6 + mdiag[45]*x7;
887         x[i2+4] = mdiag[4]*x1 + mdiag[11]*x2 + mdiag[18]*x3 + mdiag[25]*x4 + mdiag[32]*x5 + mdiag[39]*x6 + mdiag[46]*x7;
888         x[i2+5] = mdiag[5]*x1 + mdiag[12]*x2 + mdiag[19]*x3 + mdiag[26]*x4 + mdiag[33]*x5 + mdiag[40]*x6 + mdiag[47]*x7;
889         x[i2+6] = mdiag[6]*x1 + mdiag[13]*x2 + mdiag[20]*x3 + mdiag[27]*x4 + mdiag[34]*x5 + mdiag[41]*x6 + mdiag[48]*x7;
890         mdiag  += 49;
891         i2     += 7;
892       }
893       ierr = PetscLogFlops(93.0*m);CHKERRQ(ierr);
894     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
895       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
896     }
897     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
898       idiag   = a->idiag+49*a->mbs - 49;
899       i2      = 7*m - 7;
900       x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2]; x4 = x[i2+3]; x5 = x[i2+4]; x6 = x[i2+5]; x7 = x[i2+6];
901       x[i2]   = idiag[0]*x1 + idiag[7]*x2  + idiag[14]*x3 + idiag[21]*x4 + idiag[28]*x5 + idiag[35]*x6 + idiag[42]*x7;
902       x[i2+1] = idiag[1]*x1 + idiag[8]*x2  + idiag[15]*x3 + idiag[22]*x4 + idiag[29]*x5 + idiag[36]*x6 + idiag[43]*x7;
903       x[i2+2] = idiag[2]*x1 + idiag[9]*x2  + idiag[16]*x3 + idiag[23]*x4 + idiag[30]*x5 + idiag[37]*x6 + idiag[44]*x7;
904       x[i2+3] = idiag[3]*x1 + idiag[10]*x2 + idiag[17]*x3 + idiag[24]*x4 + idiag[31]*x5 + idiag[38]*x6 + idiag[45]*x7;
905       x[i2+4] = idiag[4]*x1 + idiag[11]*x2 + idiag[18]*x3 + idiag[25]*x4 + idiag[32]*x5 + idiag[39]*x6 + idiag[46]*x7;
906       x[i2+5] = idiag[5]*x1 + idiag[12]*x2 + idiag[19]*x3 + idiag[26]*x4 + idiag[33]*x5 + idiag[40]*x6 + idiag[47]*x7;
907       x[i2+6] = idiag[6]*x1 + idiag[13]*x2 + idiag[20]*x3 + idiag[27]*x4 + idiag[34]*x5 + idiag[41]*x6 + idiag[48]*x7;
908       idiag -= 49;
909       i2    -= 7;
910       for (i=m-2; i>=0; i--) {
911         v     = aa + 49*(diag[i]+1);
912         vi    = aj + diag[i] + 1;
913         nz    = ai[i+1] - diag[i] - 1;
914         s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2]; s4 = x[i2+3]; s5 = x[i2+4]; s6 = x[i2+5]; s7 = x[i2+6];
915         while (nz--) {
916           idx  = 7*(*vi++);
917           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx]; x4 = x[3+idx]; x5 = x[4+idx]; x6 = x[5+idx]; x7 = x[6+idx];
918           s1  -= v[0]*x1 + v[7]*x2  + v[14]*x3 + v[21]*x4 + v[28]*x5 + v[35]*x6 + v[42]*x7;
919           s2  -= v[1]*x1 + v[8]*x2  + v[15]*x3 + v[22]*x4 + v[29]*x5 + v[36]*x6 + v[43]*x7;
920           s3  -= v[2]*x1 + v[9]*x2  + v[16]*x3 + v[23]*x4 + v[30]*x5 + v[37]*x6 + v[44]*x7;
921           s4  -= v[3]*x1 + v[10]*x2 + v[17]*x3 + v[24]*x4 + v[31]*x5 + v[38]*x6 + v[45]*x7;
922           s5  -= v[4]*x1 + v[11]*x2 + v[18]*x3 + v[25]*x4 + v[32]*x5 + v[39]*x6 + v[46]*x7;
923           s6  -= v[5]*x1 + v[12]*x2 + v[19]*x3 + v[26]*x4 + v[33]*x5 + v[40]*x6 + v[47]*x7;
924           s7  -= v[6]*x1 + v[13]*x2 + v[20]*x3 + v[27]*x4 + v[34]*x5 + v[41]*x6 + v[48]*x7;
925           v   += 49;
926         }
927         x[i2]   = idiag[0]*s1 + idiag[7]*s2  + idiag[14]*s3 + idiag[21]*s4 + idiag[28]*s5 + idiag[35]*s6 + idiag[42]*s7;
928         x[i2+1] = idiag[1]*s1 + idiag[8]*s2  + idiag[15]*s3 + idiag[22]*s4 + idiag[29]*s5 + idiag[36]*s6 + idiag[43]*s7;
929         x[i2+2] = idiag[2]*s1 + idiag[9]*s2  + idiag[16]*s3 + idiag[23]*s4 + idiag[30]*s5 + idiag[37]*s6 + idiag[44]*s7;
930         x[i2+3] = idiag[3]*s1 + idiag[10]*s2 + idiag[17]*s3 + idiag[24]*s4 + idiag[31]*s5 + idiag[38]*s6 + idiag[45]*s7;
931         x[i2+4] = idiag[4]*s1 + idiag[11]*s2 + idiag[18]*s3 + idiag[25]*s4 + idiag[32]*s5 + idiag[39]*s6 + idiag[46]*s7;
932         x[i2+5] = idiag[5]*s1 + idiag[12]*s2 + idiag[19]*s3 + idiag[26]*s4 + idiag[33]*s5 + idiag[40]*s6 + idiag[47]*s7;
933         x[i2+6] = idiag[6]*s1 + idiag[13]*s2 + idiag[20]*s3 + idiag[27]*s4 + idiag[34]*s5 + idiag[41]*s6 + idiag[48]*s7;
934         idiag   -= 49;
935         i2      -= 7;
936       }
937       ierr = PetscLogFlops(49.0*(a->nz));CHKERRQ(ierr);
938     }
939   } else {
940     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
941   }
942   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
943   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatSOR_SeqBAIJ_N"
949 PetscErrorCode MatSOR_SeqBAIJ_N(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
950 {
951   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data;
952   PetscScalar        *x,*work,*w,*workt;
953   const MatScalar    *v,*aa = a->a, *idiag,*mdiag;
954   const PetscScalar  *b;
955   PetscErrorCode     ierr;
956   PetscInt           m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j;
957   const PetscInt     *diag,*ai = a->i,*aj = a->j,*vi;
958 
959   PetscFunctionBegin;
960   its = its*lits;
961   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
962   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
963   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
964   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
965   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
966   if (its > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support yet for multiple point block SOR iterations");
967 
968   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,PETSC_NULL);CHKERRQ(ierr);}
969 
970   diag  = a->diag;
971   idiag = a->idiag;
972   if (!a->mult_work) {
973     k    = PetscMax(A->rmap->n,A->cmap->n);
974     ierr = PetscMalloc((k+1)*sizeof(PetscScalar),&a->mult_work);CHKERRQ(ierr);
975   }
976   work = a->mult_work;
977   if (!a->sor_work) {
978     ierr = PetscMalloc(bs*sizeof(PetscScalar),&a->sor_work);CHKERRQ(ierr);
979   }
980   w = a->sor_work;
981 
982   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
984 
985   if (flag & SOR_ZERO_INITIAL_GUESS) {
986     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
987       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
988       /*x[0] = b[0]*idiag[0] + b[1]*idiag[3] + b[2]*idiag[6];
989       x[1] = b[0]*idiag[1] + b[1]*idiag[4] + b[2]*idiag[7];
990       x[2] = b[0]*idiag[2] + b[1]*idiag[5] + b[2]*idiag[8];*/
991       i2     = bs;
992       idiag += bs2;
993       for (i=1; i<m; i++) {
994         v     = aa + bs2*ai[i];
995         vi    = aj + ai[i];
996         nz    = diag[i] - ai[i];
997 
998         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
999         /* copy all rows of x that are needed into contiguous space */
1000         workt = work;
1001         for (j=0; j<nz; j++) {
1002           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1003           workt += bs;
1004         }
1005         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1006        /*s1    = b[i2]; s2 = b[i2+1]; s3 = b[i2+2];
1007         while (nz--) {
1008           idx  = N*(*vi++);
1009           x1   = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
1010           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1011           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1012           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1013           v   += N2;
1014           } */
1015 
1016         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1017         /*  x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1018         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1019         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3;*/
1020 
1021         idiag   += bs2;
1022         i2      += bs;
1023       }
1024       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
1025       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1026     }
1027     if ((flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) &&
1028         (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP)) {
1029       i2    = 0;
1030       mdiag = a->idiag+bs2*a->mbs;
1031       ierr  = PetscMemcpy(work,x,m*bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032       for (i=0; i<m; i++) {
1033         PetscKernel_w_gets_Ar_times_v(bs,bs,work+i2,mdiag,x+i2);
1034         /* x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1035         x[i2]   = mdiag[0]*x1 + mdiag[3]*x2 + mdiag[6]*x3;
1036         x[i2+1] = mdiag[1]*x1 + mdiag[4]*x2 + mdiag[7]*x3;
1037         x[i2+2] = mdiag[2]*x1 + mdiag[5]*x2 + mdiag[8]*x3; */
1038 
1039         mdiag  += bs2;
1040         i2     += bs;
1041       }
1042       ierr = PetscLogFlops(2.0*bs*(bs-1)*m);CHKERRQ(ierr);
1043     } else if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1044       ierr = PetscMemcpy(x,b,A->rmap->N*sizeof(PetscScalar));CHKERRQ(ierr);
1045     }
1046     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1047       idiag   = a->idiag+bs2*a->mbs - bs2;
1048       i2      = bs*m - bs;
1049       ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1050       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1051       /*x1      = x[i2]; x2 = x[i2+1]; x3 = x[i2+2];
1052       x[i2]   = idiag[0]*x1 + idiag[3]*x2 + idiag[6]*x3;
1053       x[i2+1] = idiag[1]*x1 + idiag[4]*x2 + idiag[7]*x3;
1054       x[i2+2] = idiag[2]*x1 + idiag[5]*x2 + idiag[8]*x3;*/
1055       idiag -= bs2;
1056       i2    -= bs;
1057       for (i=m-2; i>=0; i--) {
1058         v     = aa + bs2*(diag[i]+1);
1059         vi    = aj + diag[i] + 1;
1060         nz    = ai[i+1] - diag[i] - 1;
1061 
1062         ierr = PetscMemcpy(w,x+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063         /* copy all rows of x that are needed into contiguous space */
1064         workt = work;
1065         for (j=0; j<nz; j++) {
1066           ierr = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1067           workt += bs;
1068         }
1069         PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
1070         /* s1    = x[i2]; s2 = x[i2+1]; s3 = x[i2+2];
1071         while (nz--) {
1072           idx  = N*(*vi++);
1073           x1   = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
1074           s1  -= v[0]*x1 + v[3]*x2 + v[6]*x3;
1075           s2  -= v[1]*x1 + v[4]*x2 + v[7]*x3;
1076           s3  -= v[2]*x1 + v[5]*x2 + v[8]*x3;
1077           v   += N2;
1078           } */
1079 
1080         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
1081         /*x[i2]   = idiag[0]*s1 + idiag[3]*s2 + idiag[6]*s3;
1082         x[i2+1] = idiag[1]*s1 + idiag[4]*s2 + idiag[7]*s3;
1083         x[i2+2] = idiag[2]*s1 + idiag[5]*s2 + idiag[8]*s3; */
1084         idiag   -= bs2;
1085         i2      -= bs;
1086       }
1087       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1088     }
1089   } else {
1090     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only supports point block SOR with zero initial guess");
1091   }
1092   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1093   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 /*
1098     Special version for direct calls from Fortran (Used in PETSc-fun3d)
1099 */
1100 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1101 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
1102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1103 #define matsetvaluesblocked4_ matsetvaluesblocked4
1104 #endif
1105 
1106 EXTERN_C_BEGIN
1107 #undef __FUNCT__
1108 #define __FUNCT__ "matsetvaluesblocked4_"
1109 void  matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
1110 {
1111   Mat               A = *AA;
1112   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1113   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
1114   PetscInt          *ai=a->i,*ailen=a->ilen;
1115   PetscInt          *aj=a->j,stepval,lastcol = -1;
1116   const PetscScalar *value = v;
1117   MatScalar         *ap,*aa = a->a,*bap;
1118 
1119   PetscFunctionBegin;
1120   if (A->rmap->bs != 4) SETERRABORT(((PetscObject)A)->comm,PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
1121   stepval = (n-1)*4;
1122   for (k=0; k<m; k++) { /* loop over added rows */
1123     row  = im[k];
1124     rp   = aj + ai[row];
1125     ap   = aa + 16*ai[row];
1126     nrow = ailen[row];
1127     low  = 0;
1128     high = nrow;
1129     for (l=0; l<n; l++) { /* loop over added columns */
1130       col = in[l];
1131       if (col <= lastcol)  low = 0;
1132       else                high = nrow;
1133       lastcol = col;
1134       value = v + k*(stepval+4 + l)*4;
1135       while (high-low > 7) {
1136         t = (low+high)/2;
1137         if (rp[t] > col) high = t;
1138         else             low  = t;
1139       }
1140       for (i=low; i<high; i++) {
1141         if (rp[i] > col) break;
1142         if (rp[i] == col) {
1143           bap  = ap +  16*i;
1144           for (ii=0; ii<4; ii++,value+=stepval) {
1145             for (jj=ii; jj<16; jj+=4) {
1146               bap[jj] += *value++;
1147             }
1148           }
1149           goto noinsert2;
1150         }
1151       }
1152       N = nrow++ - 1;
1153       high++; /* added new column index thus must search to one higher than before */
1154       /* shift up all the later entries in this row */
1155       for (ii=N; ii>=i; ii--) {
1156         rp[ii+1] = rp[ii];
1157         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1158       }
1159       if (N >= i) {
1160         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1161       }
1162       rp[i] = col;
1163       bap   = ap +  16*i;
1164       for (ii=0; ii<4; ii++,value+=stepval) {
1165         for (jj=ii; jj<16; jj+=4) {
1166           bap[jj] = *value++;
1167         }
1168       }
1169       noinsert2:;
1170       low = i;
1171     }
1172     ailen[row] = nrow;
1173   }
1174   PetscFunctionReturnVoid();
1175 }
1176 EXTERN_C_END
1177 
1178 #if defined(PETSC_HAVE_FORTRAN_CAPS)
1179 #define matsetvalues4_ MATSETVALUES4
1180 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
1181 #define matsetvalues4_ matsetvalues4
1182 #endif
1183 
1184 EXTERN_C_BEGIN
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatSetValues4_"
1187 void  matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
1188 {
1189   Mat         A = *AA;
1190   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1191   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
1192   PetscInt    *ai=a->i,*ailen=a->ilen;
1193   PetscInt    *aj=a->j,brow,bcol;
1194   PetscInt    ridx,cidx,lastcol = -1;
1195   MatScalar   *ap,value,*aa=a->a,*bap;
1196 
1197   PetscFunctionBegin;
1198   for (k=0; k<m; k++) { /* loop over added rows */
1199     row  = im[k]; brow = row/4;
1200     rp   = aj + ai[brow];
1201     ap   = aa + 16*ai[brow];
1202     nrow = ailen[brow];
1203     low  = 0;
1204     high = nrow;
1205     for (l=0; l<n; l++) { /* loop over added columns */
1206       col = in[l]; bcol = col/4;
1207       ridx = row % 4; cidx = col % 4;
1208       value = v[l + k*n];
1209       if (col <= lastcol)  low = 0;
1210       else                high = nrow;
1211       lastcol = col;
1212       while (high-low > 7) {
1213         t = (low+high)/2;
1214         if (rp[t] > bcol) high = t;
1215         else              low  = t;
1216       }
1217       for (i=low; i<high; i++) {
1218         if (rp[i] > bcol) break;
1219         if (rp[i] == bcol) {
1220           bap  = ap +  16*i + 4*cidx + ridx;
1221           *bap += value;
1222           goto noinsert1;
1223         }
1224       }
1225       N = nrow++ - 1;
1226       high++; /* added new column thus must search to one higher than before */
1227       /* shift up all the later entries in this row */
1228       for (ii=N; ii>=i; ii--) {
1229         rp[ii+1] = rp[ii];
1230         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1231       }
1232       if (N>=i) {
1233         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1234       }
1235       rp[i]                    = bcol;
1236       ap[16*i + 4*cidx + ridx] = value;
1237       noinsert1:;
1238       low = i;
1239     }
1240     ailen[brow] = nrow;
1241   }
1242   PetscFunctionReturnVoid();
1243 }
1244 EXTERN_C_END
1245 
1246 /*
1247      Checks for missing diagonals
1248 */
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1251 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1252 {
1253   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1254   PetscErrorCode ierr;
1255   PetscInt       *diag,*jj = a->j,i;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1259   *missing = PETSC_FALSE;
1260   if (A->rmap->n > 0 && !jj) {
1261     *missing  = PETSC_TRUE;
1262     if (d) *d = 0;
1263     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1264   } else {
1265     diag     = a->diag;
1266     for (i=0; i<a->mbs; i++) {
1267       if (jj[diag[i]] != i) {
1268         *missing  = PETSC_TRUE;
1269         if (d) *d = i;
1270         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1271         break;
1272       }
1273     }
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1280 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1281 {
1282   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1283   PetscErrorCode ierr;
1284   PetscInt       i,j,m = a->mbs;
1285 
1286   PetscFunctionBegin;
1287   if (!a->diag) {
1288     ierr = PetscMalloc(m*sizeof(PetscInt),&a->diag);CHKERRQ(ierr);
1289     ierr = PetscLogObjectMemory(A,m*sizeof(PetscInt));CHKERRQ(ierr);
1290     a->free_diag = PETSC_TRUE;
1291   }
1292   for (i=0; i<m; i++) {
1293     a->diag[i] = a->i[i+1];
1294     for (j=a->i[i]; j<a->i[i+1]; j++) {
1295       if (a->j[j] == i) {
1296         a->diag[i] = j;
1297         break;
1298       }
1299     }
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1307 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1308 {
1309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1312   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1313 
1314   PetscFunctionBegin;
1315   *nn = n;
1316   if (!ia) PetscFunctionReturn(0);
1317   if (symmetric) {
1318     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1319     nz   = tia[n];
1320   } else {
1321     tia = a->i; tja = a->j;
1322   }
1323 
1324   if (!blockcompressed && bs > 1) {
1325     (*nn) *= bs;
1326     /* malloc & create the natural set of indices */
1327     ierr = PetscMalloc((n+1)*bs*sizeof(PetscInt),ia);CHKERRQ(ierr);
1328     if (n) {
1329       (*ia)[0] = 0;
1330       for (j=1; j<bs; j++) {
1331         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1332       }
1333     }
1334 
1335     for (i=1; i<n; i++) {
1336       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1337       for (j=1; j<bs; j++) {
1338         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1339       }
1340     }
1341     if (n) {
1342       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1343     }
1344 
1345     if (inja) {
1346       ierr = PetscMalloc(nz*bs*bs*sizeof(PetscInt),ja);CHKERRQ(ierr);
1347       cnt = 0;
1348       for (i=0; i<n; i++) {
1349         for (j=0; j<bs; j++) {
1350           for (k=tia[i]; k<tia[i+1]; k++) {
1351             for (l=0; l<bs; l++) {
1352               (*ja)[cnt++] = bs*tja[k] + l;
1353             }
1354           }
1355         }
1356       }
1357     }
1358 
1359     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1360       ierr = PetscFree(tia);CHKERRQ(ierr);
1361       ierr = PetscFree(tja);CHKERRQ(ierr);
1362     }
1363   } else if (oshift == 1) {
1364     if (symmetric) {
1365       nz = tia[A->rmap->n/bs];
1366       /*  add 1 to i and j indices */
1367       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1368       *ia = tia;
1369       if (ja) {
1370         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1371         *ja = tja;
1372       }
1373     } else {
1374       nz = a->i[A->rmap->n/bs];
1375       /* malloc space and  add 1 to i and j indices */
1376       ierr = PetscMalloc((A->rmap->n/bs+1)*sizeof(PetscInt),ia);CHKERRQ(ierr);
1377       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1378       if (ja) {
1379         ierr = PetscMalloc(nz*sizeof(PetscInt),ja);CHKERRQ(ierr);
1380         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1381       }
1382     }
1383   } else {
1384     *ia = tia;
1385     if (ja) *ja = tja;
1386   }
1387   PetscFunctionReturn(0);
1388 }
1389 
1390 #undef __FUNCT__
1391 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1392 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1393 {
1394   PetscErrorCode ierr;
1395 
1396   PetscFunctionBegin;
1397   if (!ia) PetscFunctionReturn(0);
1398   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1399     ierr = PetscFree(*ia);CHKERRQ(ierr);
1400     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1407 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1408 {
1409   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1410   PetscErrorCode ierr;
1411 
1412   PetscFunctionBegin;
1413 #if defined(PETSC_USE_LOG)
1414   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1415 #endif
1416   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1417   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1418   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1419   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1420   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1421   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1422   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1423   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1424   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1425   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1426   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1427   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1428   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1429 
1430   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1431   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1432   ierr = PetscFree(A->data);CHKERRQ(ierr);
1433 
1434   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1435   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C","",PETSC_NULL);CHKERRQ(ierr);
1436   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
1437   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
1438   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C","",PETSC_NULL);CHKERRQ(ierr);
1439   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C","",PETSC_NULL);CHKERRQ(ierr);
1440   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C","",PETSC_NULL);CHKERRQ(ierr);
1441   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
1442   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C","",PETSC_NULL);CHKERRQ(ierr);
1443   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
1444   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C","",PETSC_NULL);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1450 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool  flg)
1451 {
1452   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1453   PetscErrorCode ierr;
1454 
1455   PetscFunctionBegin;
1456   switch (op) {
1457   case MAT_ROW_ORIENTED:
1458     a->roworiented    = flg;
1459     break;
1460   case MAT_KEEP_NONZERO_PATTERN:
1461     a->keepnonzeropattern = flg;
1462     break;
1463   case MAT_NEW_NONZERO_LOCATIONS:
1464     a->nonew          = (flg ? 0 : 1);
1465     break;
1466   case MAT_NEW_NONZERO_LOCATION_ERR:
1467     a->nonew          = (flg ? -1 : 0);
1468     break;
1469   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1470     a->nonew          = (flg ? -2 : 0);
1471     break;
1472   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1473     a->nounused       = (flg ? -1 : 0);
1474     break;
1475   case MAT_CHECK_COMPRESSED_ROW:
1476     a->compressedrow.check = flg;
1477     break;
1478   case MAT_NEW_DIAGONALS:
1479   case MAT_IGNORE_OFF_PROC_ENTRIES:
1480   case MAT_USE_HASH_TABLE:
1481     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1482     break;
1483   case MAT_SPD:
1484   case MAT_SYMMETRIC:
1485   case MAT_STRUCTURALLY_SYMMETRIC:
1486   case MAT_HERMITIAN:
1487   case MAT_SYMMETRY_ETERNAL:
1488     /* These options are handled directly by MatSetOption() */
1489     break;
1490   default:
1491     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1492   }
1493   PetscFunctionReturn(0);
1494 }
1495 
1496 #undef __FUNCT__
1497 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1498 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1499 {
1500   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1501   PetscErrorCode ierr;
1502   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1503   MatScalar      *aa,*aa_i;
1504   PetscScalar    *v_i;
1505 
1506   PetscFunctionBegin;
1507   bs  = A->rmap->bs;
1508   ai  = a->i;
1509   aj  = a->j;
1510   aa  = a->a;
1511   bs2 = a->bs2;
1512 
1513   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1514 
1515   bn  = row/bs;   /* Block number */
1516   bp  = row % bs; /* Block Position */
1517   M   = ai[bn+1] - ai[bn];
1518   *nz = bs*M;
1519 
1520   if (v) {
1521     *v = 0;
1522     if (*nz) {
1523       ierr = PetscMalloc((*nz)*sizeof(PetscScalar),v);CHKERRQ(ierr);
1524       for (i=0; i<M; i++) { /* for each block in the block row */
1525         v_i  = *v + i*bs;
1526         aa_i = aa + bs2*(ai[bn] + i);
1527         for (j=bp,k=0; j<bs2; j+=bs,k++) {v_i[k] = aa_i[j];}
1528       }
1529     }
1530   }
1531 
1532   if (idx) {
1533     *idx = 0;
1534     if (*nz) {
1535       ierr = PetscMalloc((*nz)*sizeof(PetscInt),idx);CHKERRQ(ierr);
1536       for (i=0; i<M; i++) { /* for each block in the block row */
1537         idx_i = *idx + i*bs;
1538         itmp  = bs*aj[ai[bn] + i];
1539         for (j=0; j<bs; j++) {idx_i[j] = itmp++;}
1540       }
1541     }
1542   }
1543   PetscFunctionReturn(0);
1544 }
1545 
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1548 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1549 {
1550   PetscErrorCode ierr;
1551 
1552   PetscFunctionBegin;
1553   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1554   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1555   PetscFunctionReturn(0);
1556 }
1557 
1558 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1559 
1560 #undef __FUNCT__
1561 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1562 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1563 {
1564   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ *)A->data;
1565   Mat            C;
1566   PetscErrorCode ierr;
1567   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1568   PetscInt       *rows,*cols,bs2=a->bs2;
1569   MatScalar      *array;
1570 
1571   PetscFunctionBegin;
1572   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1573   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1574     ierr = PetscMalloc((1+nbs)*sizeof(PetscInt),&col);CHKERRQ(ierr);
1575     ierr = PetscMemzero(col,(1+nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1576 
1577     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1578     ierr = MatCreate(((PetscObject)A)->comm,&C);CHKERRQ(ierr);
1579     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1580     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1581     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1582     ierr = PetscFree(col);CHKERRQ(ierr);
1583   } else {
1584     C = *B;
1585   }
1586 
1587   array = a->a;
1588   ierr = PetscMalloc2(bs,PetscInt,&rows,bs,PetscInt,&cols);CHKERRQ(ierr);
1589   for (i=0; i<mbs; i++) {
1590     cols[0] = i*bs;
1591     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1592     len = ai[i+1] - ai[i];
1593     for (j=0; j<len; j++) {
1594       rows[0] = (*aj++)*bs;
1595       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1596       ierr = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1597       array += bs2;
1598     }
1599   }
1600   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1601 
1602   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1603   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1604 
1605   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1606     *B = C;
1607   } else {
1608     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1609   }
1610   PetscFunctionReturn(0);
1611 }
1612 
1613 EXTERN_C_BEGIN
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1616 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1617 {
1618   PetscErrorCode ierr;
1619   Mat            Btrans;
1620 
1621   PetscFunctionBegin;
1622   *f = PETSC_FALSE;
1623   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1624   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1625   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1626   PetscFunctionReturn(0);
1627 }
1628 EXTERN_C_END
1629 
1630 #undef __FUNCT__
1631 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1632 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1633 {
1634   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1635   PetscErrorCode ierr;
1636   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1637   int            fd;
1638   PetscScalar    *aa;
1639   FILE           *file;
1640 
1641   PetscFunctionBegin;
1642   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1643   ierr        = PetscMalloc((4+A->rmap->N)*sizeof(PetscInt),&col_lens);CHKERRQ(ierr);
1644   col_lens[0] = MAT_FILE_CLASSID;
1645 
1646   col_lens[1] = A->rmap->N;
1647   col_lens[2] = A->cmap->n;
1648   col_lens[3] = a->nz*bs2;
1649 
1650   /* store lengths of each row and write (including header) to file */
1651   count = 0;
1652   for (i=0; i<a->mbs; i++) {
1653     for (j=0; j<bs; j++) {
1654       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1655     }
1656   }
1657   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1658   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1659 
1660   /* store column indices (zero start index) */
1661   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscInt),&jj);CHKERRQ(ierr);
1662   count = 0;
1663   for (i=0; i<a->mbs; i++) {
1664     for (j=0; j<bs; j++) {
1665       for (k=a->i[i]; k<a->i[i+1]; k++) {
1666         for (l=0; l<bs; l++) {
1667           jj[count++] = bs*a->j[k] + l;
1668         }
1669       }
1670     }
1671   }
1672   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1673   ierr = PetscFree(jj);CHKERRQ(ierr);
1674 
1675   /* store nonzero values */
1676   ierr  = PetscMalloc((a->nz+1)*bs2*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
1677   count = 0;
1678   for (i=0; i<a->mbs; i++) {
1679     for (j=0; j<bs; j++) {
1680       for (k=a->i[i]; k<a->i[i+1]; k++) {
1681         for (l=0; l<bs; l++) {
1682           aa[count++] = a->a[bs2*k + l*bs + j];
1683         }
1684       }
1685     }
1686   }
1687   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1688   ierr = PetscFree(aa);CHKERRQ(ierr);
1689 
1690   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1691   if (file) {
1692     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1693   }
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #undef __FUNCT__
1698 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1699 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1700 {
1701   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1702   PetscErrorCode    ierr;
1703   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1704   PetscViewerFormat format;
1705 
1706   PetscFunctionBegin;
1707   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1708   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1709     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1710   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1711     Mat aij;
1712     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1713     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1714     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1715   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1716      PetscFunctionReturn(0);
1717   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1718     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1719     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1720     for (i=0; i<a->mbs; i++) {
1721       for (j=0; j<bs; j++) {
1722         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1723         for (k=a->i[i]; k<a->i[i+1]; k++) {
1724           for (l=0; l<bs; l++) {
1725 #if defined(PETSC_USE_COMPLEX)
1726             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1727               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %Gi) ",bs*a->j[k]+l,
1728                       PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1729             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1730               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %Gi) ",bs*a->j[k]+l,
1731                       PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1732             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1733               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1734             }
1735 #else
1736             if (a->a[bs2*k + l*bs + j] != 0.0) {
1737               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1738             }
1739 #endif
1740           }
1741         }
1742         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1743       }
1744     }
1745     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1746   } else {
1747     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1748     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer,"Matrix Object");CHKERRQ(ierr);
1749     for (i=0; i<a->mbs; i++) {
1750       for (j=0; j<bs; j++) {
1751         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1752         for (k=a->i[i]; k<a->i[i+1]; k++) {
1753           for (l=0; l<bs; l++) {
1754 #if defined(PETSC_USE_COMPLEX)
1755             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1756               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G + %G i) ",bs*a->j[k]+l,
1757                 PetscRealPart(a->a[bs2*k + l*bs + j]),PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1758             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1759               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G - %G i) ",bs*a->j[k]+l,
1760                 PetscRealPart(a->a[bs2*k + l*bs + j]),-PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1761             } else {
1762               ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1763             }
1764 #else
1765             ierr = PetscViewerASCIIPrintf(viewer," (%D, %G) ",bs*a->j[k]+l,a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1766 #endif
1767           }
1768         }
1769         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1770       }
1771     }
1772     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1773   }
1774   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1775   PetscFunctionReturn(0);
1776 }
1777 
1778 #undef __FUNCT__
1779 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1780 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1781 {
1782   Mat            A = (Mat) Aa;
1783   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1784   PetscErrorCode ierr;
1785   PetscInt       row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1786   PetscReal      xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1787   MatScalar      *aa;
1788   PetscViewer    viewer;
1789   PetscViewerFormat format;
1790 
1791   PetscFunctionBegin;
1792   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1793   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1794 
1795   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1796 
1797   /* loop over matrix elements drawing boxes */
1798 
1799   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1800     color = PETSC_DRAW_BLUE;
1801     for (i=0,row=0; i<mbs; i++,row+=bs) {
1802       for (j=a->i[i]; j<a->i[i+1]; j++) {
1803         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1804         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1805         aa = a->a + j*bs2;
1806         for (k=0; k<bs; k++) {
1807           for (l=0; l<bs; l++) {
1808             if (PetscRealPart(*aa++) >=  0.) continue;
1809             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1810           }
1811         }
1812       }
1813     }
1814     color = PETSC_DRAW_CYAN;
1815     for (i=0,row=0; i<mbs; i++,row+=bs) {
1816       for (j=a->i[i]; j<a->i[i+1]; j++) {
1817         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1818         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1819         aa = a->a + j*bs2;
1820         for (k=0; k<bs; k++) {
1821           for (l=0; l<bs; l++) {
1822             if (PetscRealPart(*aa++) != 0.) continue;
1823             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1824           }
1825         }
1826       }
1827     }
1828     color = PETSC_DRAW_RED;
1829     for (i=0,row=0; i<mbs; i++,row+=bs) {
1830       for (j=a->i[i]; j<a->i[i+1]; j++) {
1831         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1832         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1833         aa = a->a + j*bs2;
1834         for (k=0; k<bs; k++) {
1835           for (l=0; l<bs; l++) {
1836             if (PetscRealPart(*aa++) <= 0.) continue;
1837             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1838           }
1839         }
1840       }
1841     }
1842   } else {
1843     /* use contour shading to indicate magnitude of values */
1844     /* first determine max of all nonzero values */
1845     PetscDraw   popup;
1846     PetscReal scale,maxv = 0.0;
1847 
1848     for (i=0; i<a->nz*a->bs2; i++) {
1849       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1850     }
1851     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1852     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1853     if (popup) {ierr  = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);}
1854     for (i=0,row=0; i<mbs; i++,row+=bs) {
1855       for (j=a->i[i]; j<a->i[i+1]; j++) {
1856         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1857         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1858         aa = a->a + j*bs2;
1859         for (k=0; k<bs; k++) {
1860           for (l=0; l<bs; l++) {
1861             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1862             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1863           }
1864         }
1865       }
1866     }
1867   }
1868   PetscFunctionReturn(0);
1869 }
1870 
1871 #undef __FUNCT__
1872 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1873 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1874 {
1875   PetscErrorCode ierr;
1876   PetscReal      xl,yl,xr,yr,w,h;
1877   PetscDraw      draw;
1878   PetscBool      isnull;
1879 
1880   PetscFunctionBegin;
1881 
1882   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1883   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1884 
1885   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1886   xr  = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1887   xr += w;    yr += h;  xl = -w;     yl = -h;
1888   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1889   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1890   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",PETSC_NULL);CHKERRQ(ierr);
1891   PetscFunctionReturn(0);
1892 }
1893 
1894 #undef __FUNCT__
1895 #define __FUNCT__ "MatView_SeqBAIJ"
1896 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1897 {
1898   PetscErrorCode ierr;
1899   PetscBool      iascii,isbinary,isdraw;
1900 
1901   PetscFunctionBegin;
1902   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1903   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1904   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1905   if (iascii) {
1906     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1907   } else if (isbinary) {
1908     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1909   } else if (isdraw) {
1910     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1911   } else {
1912     Mat B;
1913     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1914     ierr = MatView(B,viewer);CHKERRQ(ierr);
1915     ierr = MatDestroy(&B);CHKERRQ(ierr);
1916   }
1917   PetscFunctionReturn(0);
1918 }
1919 
1920 
1921 #undef __FUNCT__
1922 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1923 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1924 {
1925   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1926   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1927   PetscInt    *ai = a->i,*ailen = a->ilen;
1928   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1929   MatScalar   *ap,*aa = a->a;
1930 
1931   PetscFunctionBegin;
1932   for (k=0; k<m; k++) { /* loop over rows */
1933     row  = im[k]; brow = row/bs;
1934     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1935     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1936     rp   = aj + ai[brow] ; ap = aa + bs2*ai[brow] ;
1937     nrow = ailen[brow];
1938     for (l=0; l<n; l++) { /* loop over columns */
1939       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1940       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1941       col  = in[l] ;
1942       bcol = col/bs;
1943       cidx = col%bs;
1944       ridx = row%bs;
1945       high = nrow;
1946       low  = 0; /* assume unsorted */
1947       while (high-low > 5) {
1948         t = (low+high)/2;
1949         if (rp[t] > bcol) high = t;
1950         else             low  = t;
1951       }
1952       for (i=low; i<high; i++) {
1953         if (rp[i] > bcol) break;
1954         if (rp[i] == bcol) {
1955           *v++ = ap[bs2*i+bs*cidx+ridx];
1956           goto finished;
1957         }
1958       }
1959       *v++ = 0.0;
1960       finished:;
1961     }
1962   }
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 #undef __FUNCT__
1967 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1968 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1969 {
1970   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1971   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1972   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1973   PetscErrorCode    ierr;
1974   PetscInt          *aj=a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1975   PetscBool         roworiented=a->roworiented;
1976   const PetscScalar *value = v;
1977   MatScalar         *ap,*aa = a->a,*bap;
1978 
1979   PetscFunctionBegin;
1980   if (roworiented) {
1981     stepval = (n-1)*bs;
1982   } else {
1983     stepval = (m-1)*bs;
1984   }
1985   for (k=0; k<m; k++) { /* loop over added rows */
1986     row  = im[k];
1987     if (row < 0) continue;
1988 #if defined(PETSC_USE_DEBUG)
1989     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1990 #endif
1991     rp   = aj + ai[row];
1992     ap   = aa + bs2*ai[row];
1993     rmax = imax[row];
1994     nrow = ailen[row];
1995     low  = 0;
1996     high = nrow;
1997     for (l=0; l<n; l++) { /* loop over added columns */
1998       if (in[l] < 0) continue;
1999 #if defined(PETSC_USE_DEBUG)
2000       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
2001 #endif
2002       col = in[l];
2003       if (roworiented) {
2004         value = v + (k*(stepval+bs) + l)*bs;
2005       } else {
2006         value = v + (l*(stepval+bs) + k)*bs;
2007       }
2008       if (col <= lastcol) low = 0; else high = nrow;
2009       lastcol = col;
2010       while (high-low > 7) {
2011         t = (low+high)/2;
2012         if (rp[t] > col) high = t;
2013         else             low  = t;
2014       }
2015       for (i=low; i<high; i++) {
2016         if (rp[i] > col) break;
2017         if (rp[i] == col) {
2018           bap  = ap +  bs2*i;
2019           if (roworiented) {
2020             if (is == ADD_VALUES) {
2021               for (ii=0; ii<bs; ii++,value+=stepval) {
2022                 for (jj=ii; jj<bs2; jj+=bs) {
2023                   bap[jj] += *value++;
2024                 }
2025               }
2026             } else {
2027               for (ii=0; ii<bs; ii++,value+=stepval) {
2028                 for (jj=ii; jj<bs2; jj+=bs) {
2029                   bap[jj] = *value++;
2030                 }
2031               }
2032             }
2033           } else {
2034             if (is == ADD_VALUES) {
2035               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2036                 for (jj=0; jj<bs; jj++) {
2037                   bap[jj] += value[jj];
2038                 }
2039                 bap += bs;
2040               }
2041             } else {
2042               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
2043                 for (jj=0; jj<bs; jj++) {
2044                   bap[jj]  = value[jj];
2045                 }
2046                 bap += bs;
2047               }
2048             }
2049           }
2050           goto noinsert2;
2051         }
2052       }
2053       if (nonew == 1) goto noinsert2;
2054       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2055       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2056       N = nrow++ - 1; high++;
2057       /* shift up all the later entries in this row */
2058       for (ii=N; ii>=i; ii--) {
2059         rp[ii+1] = rp[ii];
2060         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2061       }
2062       if (N >= i) {
2063         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2064       }
2065       rp[i] = col;
2066       bap   = ap +  bs2*i;
2067       if (roworiented) {
2068         for (ii=0; ii<bs; ii++,value+=stepval) {
2069           for (jj=ii; jj<bs2; jj+=bs) {
2070             bap[jj] = *value++;
2071           }
2072         }
2073       } else {
2074         for (ii=0; ii<bs; ii++,value+=stepval) {
2075           for (jj=0; jj<bs; jj++) {
2076             *bap++  = *value++;
2077           }
2078         }
2079       }
2080       noinsert2:;
2081       low = i;
2082     }
2083     ailen[row] = nrow;
2084   }
2085   PetscFunctionReturn(0);
2086 }
2087 
2088 #undef __FUNCT__
2089 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
2090 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
2091 {
2092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2093   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
2094   PetscInt       m = A->rmap->N,*ip,N,*ailen = a->ilen;
2095   PetscErrorCode ierr;
2096   PetscInt       mbs = a->mbs,bs2 = a->bs2,rmax = 0;
2097   MatScalar      *aa = a->a,*ap;
2098   PetscReal      ratio=0.6;
2099 
2100   PetscFunctionBegin;
2101   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
2102 
2103   if (m) rmax = ailen[0];
2104   for (i=1; i<mbs; i++) {
2105     /* move each row back by the amount of empty slots (fshift) before it*/
2106     fshift += imax[i-1] - ailen[i-1];
2107     rmax   = PetscMax(rmax,ailen[i]);
2108     if (fshift) {
2109       ip = aj + ai[i]; ap = aa + bs2*ai[i];
2110       N = ailen[i];
2111       for (j=0; j<N; j++) {
2112         ip[j-fshift] = ip[j];
2113         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2114       }
2115     }
2116     ai[i] = ai[i-1] + ailen[i-1];
2117   }
2118   if (mbs) {
2119     fshift += imax[mbs-1] - ailen[mbs-1];
2120     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
2121   }
2122   /* reset ilen and imax for each row */
2123   for (i=0; i<mbs; i++) {
2124     ailen[i] = imax[i] = ai[i+1] - ai[i];
2125   }
2126   a->nz = ai[mbs];
2127 
2128   /* diagonals may have moved, so kill the diagonal pointers */
2129   a->idiagvalid = PETSC_FALSE;
2130   if (fshift && a->diag) {
2131     ierr = PetscFree(a->diag);CHKERRQ(ierr);
2132     ierr = PetscLogObjectMemory(A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
2133     a->diag = 0;
2134   }
2135   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
2136   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
2137   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
2138   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
2139   A->info.mallocs     += a->reallocs;
2140   a->reallocs          = 0;
2141   A->info.nz_unneeded  = (PetscReal)fshift*bs2;
2142 
2143   ierr = MatCheckCompressedRow(A,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
2144   A->same_nonzero = PETSC_TRUE;
2145   PetscFunctionReturn(0);
2146 }
2147 
2148 /*
2149    This function returns an array of flags which indicate the locations of contiguous
2150    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
2151    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
2152    Assume: sizes should be long enough to hold all the values.
2153 */
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
2156 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
2157 {
2158   PetscInt   i,j,k,row;
2159   PetscBool  flg;
2160 
2161   PetscFunctionBegin;
2162   for (i=0,j=0; i<n; j++) {
2163     row = idx[i];
2164     if (row%bs!=0) { /* Not the begining of a block */
2165       sizes[j] = 1;
2166       i++;
2167     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
2168       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
2169       i++;
2170     } else { /* Begining of the block, so check if the complete block exists */
2171       flg = PETSC_TRUE;
2172       for (k=1; k<bs; k++) {
2173         if (row+k != idx[i+k]) { /* break in the block */
2174           flg = PETSC_FALSE;
2175           break;
2176         }
2177       }
2178       if (flg) { /* No break in the bs */
2179         sizes[j] = bs;
2180         i+= bs;
2181       } else {
2182         sizes[j] = 1;
2183         i++;
2184       }
2185     }
2186   }
2187   *bs_max = j;
2188   PetscFunctionReturn(0);
2189 }
2190 
2191 #undef __FUNCT__
2192 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
2193 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2194 {
2195   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2196   PetscErrorCode    ierr;
2197   PetscInt          i,j,k,count,*rows;
2198   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2199   PetscScalar       zero = 0.0;
2200   MatScalar         *aa;
2201   const PetscScalar *xx;
2202   PetscScalar       *bb;
2203 
2204   PetscFunctionBegin;
2205   /* fix right hand side if needed */
2206   if (x && b) {
2207     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2208     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2209     for (i=0; i<is_n; i++) {
2210       bb[is_idx[i]] = diag*xx[is_idx[i]];
2211     }
2212     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2213     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2214   }
2215 
2216   /* Make a copy of the IS and  sort it */
2217   /* allocate memory for rows,sizes */
2218   ierr  = PetscMalloc2(is_n,PetscInt,&rows,2*is_n,PetscInt,&sizes);CHKERRQ(ierr);
2219 
2220   /* copy IS values to rows, and sort them */
2221   for (i=0; i<is_n; i++) { rows[i] = is_idx[i]; }
2222   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2223 
2224   if (baij->keepnonzeropattern) {
2225     for (i=0; i<is_n; i++) { sizes[i] = 1; }
2226     bs_max = is_n;
2227     A->same_nonzero = PETSC_TRUE;
2228   } else {
2229     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2230     A->same_nonzero = PETSC_FALSE;
2231   }
2232 
2233   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2234     row   = rows[j];
2235     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2236     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2237     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2238     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2239       if (diag != (PetscScalar)0.0) {
2240         if (baij->ilen[row/bs] > 0) {
2241           baij->ilen[row/bs]       = 1;
2242           baij->j[baij->i[row/bs]] = row/bs;
2243           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2244         }
2245         /* Now insert all the diagonal values for this bs */
2246         for (k=0; k<bs; k++) {
2247           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2248         }
2249       } else { /* (diag == 0.0) */
2250         baij->ilen[row/bs] = 0;
2251       } /* end (diag == 0.0) */
2252     } else { /* (sizes[i] != bs) */
2253 #if defined (PETSC_USE_DEBUG)
2254       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2255 #endif
2256       for (k=0; k<count; k++) {
2257         aa[0] =  zero;
2258         aa    += bs;
2259       }
2260       if (diag != (PetscScalar)0.0) {
2261         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2262       }
2263     }
2264   }
2265 
2266   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2267   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2273 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2274 {
2275   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2276   PetscErrorCode    ierr;
2277   PetscInt          i,j,k,count;
2278   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,row,col;
2279   PetscScalar       zero = 0.0;
2280   MatScalar         *aa;
2281   const PetscScalar *xx;
2282   PetscScalar       *bb;
2283   PetscBool         *zeroed,vecs = PETSC_FALSE;
2284 
2285   PetscFunctionBegin;
2286   /* fix right hand side if needed */
2287   if (x && b) {
2288     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2289     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2290     vecs = PETSC_TRUE;
2291   }
2292   A->same_nonzero = PETSC_TRUE;
2293 
2294   /* zero the columns */
2295   ierr = PetscMalloc(A->rmap->n*sizeof(PetscBool),&zeroed);CHKERRQ(ierr);
2296   ierr = PetscMemzero(zeroed,A->rmap->n*sizeof(PetscBool));CHKERRQ(ierr);
2297   for (i=0; i<is_n; i++) {
2298     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2299     zeroed[is_idx[i]] = PETSC_TRUE;
2300   }
2301   for (i=0; i<A->rmap->N; i++) {
2302     if (!zeroed[i]) {
2303       row = i/bs;
2304       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2305         for (k=0; k<bs; k++) {
2306           col = bs*baij->j[j] + k;
2307           if (zeroed[col]) {
2308             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2309             if (vecs) bb[i] -= aa[0]*xx[col];
2310             aa[0] = 0.0;
2311           }
2312         }
2313       }
2314     } else if (vecs) bb[i] = diag*xx[i];
2315   }
2316   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2317   if (vecs) {
2318     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2319     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2320   }
2321 
2322   /* zero the rows */
2323   for (i=0; i<is_n; i++) {
2324     row   = is_idx[i];
2325     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2326     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2327     for (k=0; k<count; k++) {
2328       aa[0] =  zero;
2329       aa    += bs;
2330     }
2331     if (diag != (PetscScalar)0.0) {
2332       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2333     }
2334   }
2335   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2336   PetscFunctionReturn(0);
2337 }
2338 
2339 #undef __FUNCT__
2340 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2341 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2342 {
2343   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2344   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2345   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2346   PetscInt       *aj=a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2347   PetscErrorCode ierr;
2348   PetscInt       ridx,cidx,bs2=a->bs2;
2349   PetscBool      roworiented=a->roworiented;
2350   MatScalar      *ap,value,*aa=a->a,*bap;
2351 
2352   PetscFunctionBegin;
2353   if (v) PetscValidScalarPointer(v,6);
2354   for (k=0; k<m; k++) { /* loop over added rows */
2355     row  = im[k];
2356     brow = row/bs;
2357     if (row < 0) continue;
2358 #if defined(PETSC_USE_DEBUG)
2359     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2360 #endif
2361     rp   = aj + ai[brow];
2362     ap   = aa + bs2*ai[brow];
2363     rmax = imax[brow];
2364     nrow = ailen[brow];
2365     low  = 0;
2366     high = nrow;
2367     for (l=0; l<n; l++) { /* loop over added columns */
2368       if (in[l] < 0) continue;
2369 #if defined(PETSC_USE_DEBUG)
2370       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2371 #endif
2372       col = in[l]; bcol = col/bs;
2373       ridx = row % bs; cidx = col % bs;
2374       if (roworiented) {
2375         value = v[l + k*n];
2376       } else {
2377         value = v[k + l*m];
2378       }
2379       if (col <= lastcol) low = 0; else high = nrow;
2380       lastcol = col;
2381       while (high-low > 7) {
2382         t = (low+high)/2;
2383         if (rp[t] > bcol) high = t;
2384         else              low  = t;
2385       }
2386       for (i=low; i<high; i++) {
2387         if (rp[i] > bcol) break;
2388         if (rp[i] == bcol) {
2389           bap  = ap +  bs2*i + bs*cidx + ridx;
2390           if (is == ADD_VALUES) *bap += value;
2391           else                  *bap  = value;
2392           goto noinsert1;
2393         }
2394       }
2395       if (nonew == 1) goto noinsert1;
2396       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2397       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2398       N = nrow++ - 1; high++;
2399       /* shift up all the later entries in this row */
2400       for (ii=N; ii>=i; ii--) {
2401         rp[ii+1] = rp[ii];
2402         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2403       }
2404       if (N>=i) {
2405         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2406       }
2407       rp[i]                      = bcol;
2408       ap[bs2*i + bs*cidx + ridx] = value;
2409       a->nz++;
2410       noinsert1:;
2411       low = i;
2412     }
2413     ailen[brow] = nrow;
2414   }
2415   A->same_nonzero = PETSC_FALSE;
2416   PetscFunctionReturn(0);
2417 }
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2421 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2422 {
2423   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2424   Mat            outA;
2425   PetscErrorCode ierr;
2426   PetscBool      row_identity,col_identity;
2427 
2428   PetscFunctionBegin;
2429   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2430   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2431   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2432   if (!row_identity || !col_identity) {
2433     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2434   }
2435 
2436   outA            = inA;
2437   inA->factortype = MAT_FACTOR_LU;
2438 
2439   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2440 
2441   ierr = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2442   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
2443   a->row = row;
2444   ierr = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2445   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
2446   a->col = col;
2447 
2448   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2449   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2450    ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2451   ierr = PetscLogObjectParent(inA,a->icol);CHKERRQ(ierr);
2452 
2453   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2454   if (!a->solve_work) {
2455     ierr = PetscMalloc((inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar),&a->solve_work);CHKERRQ(ierr);
2456     ierr = PetscLogObjectMemory(inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2457   }
2458   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2459 
2460   PetscFunctionReturn(0);
2461 }
2462 
2463 EXTERN_C_BEGIN
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2466 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2467 {
2468   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)mat->data;
2469   PetscInt    i,nz,mbs;
2470 
2471   PetscFunctionBegin;
2472   nz  = baij->maxnz;
2473   mbs = baij->mbs;
2474   for (i=0; i<nz; i++) {
2475     baij->j[i] = indices[i];
2476   }
2477   baij->nz = nz;
2478   for (i=0; i<mbs; i++) {
2479     baij->ilen[i] = baij->imax[i];
2480   }
2481   PetscFunctionReturn(0);
2482 }
2483 EXTERN_C_END
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2487 /*@
2488     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2489        in the matrix.
2490 
2491   Input Parameters:
2492 +  mat - the SeqBAIJ matrix
2493 -  indices - the column indices
2494 
2495   Level: advanced
2496 
2497   Notes:
2498     This can be called if you have precomputed the nonzero structure of the
2499   matrix and want to provide it to the matrix object to improve the performance
2500   of the MatSetValues() operation.
2501 
2502     You MUST have set the correct numbers of nonzeros per row in the call to
2503   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2504 
2505     MUST be called before any calls to MatSetValues();
2506 
2507 @*/
2508 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2509 {
2510   PetscErrorCode ierr;
2511 
2512   PetscFunctionBegin;
2513   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2514   PetscValidPointer(indices,2);
2515   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt *),(mat,indices));CHKERRQ(ierr);
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 #undef __FUNCT__
2520 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2521 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2522 {
2523   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2524   PetscErrorCode ierr;
2525   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2526   PetscReal      atmp;
2527   PetscScalar    *x,zero = 0.0;
2528   MatScalar      *aa;
2529   PetscInt       ncols,brow,krow,kcol;
2530 
2531   PetscFunctionBegin;
2532   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2533   bs   = A->rmap->bs;
2534   aa   = a->a;
2535   ai   = a->i;
2536   aj   = a->j;
2537   mbs  = a->mbs;
2538 
2539   ierr = VecSet(v,zero);CHKERRQ(ierr);
2540   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2541   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2542   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2543   for (i=0; i<mbs; i++) {
2544     ncols = ai[1] - ai[0]; ai++;
2545     brow  = bs*i;
2546     for (j=0; j<ncols; j++) {
2547       for (kcol=0; kcol<bs; kcol++) {
2548         for (krow=0; krow<bs; krow++) {
2549           atmp = PetscAbsScalar(*aa);aa++;
2550           row = brow + krow;    /* row index */
2551           /* printf("val[%d,%d]: %G\n",row,bcol+kcol,atmp); */
2552           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2553         }
2554       }
2555       aj++;
2556     }
2557   }
2558   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2559   PetscFunctionReturn(0);
2560 }
2561 
2562 #undef __FUNCT__
2563 #define __FUNCT__ "MatCopy_SeqBAIJ"
2564 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2565 {
2566   PetscErrorCode ierr;
2567 
2568   PetscFunctionBegin;
2569   /* If the two matrices have the same copy implementation, use fast copy. */
2570   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2571     Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2572     Mat_SeqBAIJ *b = (Mat_SeqBAIJ*)B->data;
2573     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2574 
2575     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2576     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2577     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2578   } else {
2579     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2580   }
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2586 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2587 {
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2592   PetscFunctionReturn(0);
2593 }
2594 
2595 #undef __FUNCT__
2596 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2597 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2598 {
2599   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2600   PetscFunctionBegin;
2601   *array = a->a;
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 #undef __FUNCT__
2606 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2607 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2608 {
2609   PetscFunctionBegin;
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 #undef __FUNCT__
2614 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2615 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2616 {
2617   Mat_SeqBAIJ    *x  = (Mat_SeqBAIJ *)X->data,*y = (Mat_SeqBAIJ *)Y->data;
2618   PetscErrorCode ierr;
2619   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2620   PetscBLASInt   one=1;
2621 
2622   PetscFunctionBegin;
2623   if (str == SAME_NONZERO_PATTERN) {
2624     PetscScalar alpha = a;
2625     PetscBLASInt bnz = PetscBLASIntCast(x->nz*bs2);
2626     BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one);
2627   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2628     if (y->xtoy && y->XtoY != X) {
2629       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2630       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2631     }
2632     if (!y->xtoy) { /* get xtoy */
2633       ierr = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,PETSC_NULL, y->i,y->j,PETSC_NULL, &y->xtoy);CHKERRQ(ierr);
2634       y->XtoY = X;
2635       ierr = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2636     }
2637     for (i=0; i<x->nz; i++) {
2638       j = 0;
2639       while (j < bs2) {
2640         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2641         j++;
2642       }
2643     }
2644     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %G\n",bs2*x->nz,bs2*y->nz,(PetscReal)(bs2*x->nz)/(bs2*y->nz));CHKERRQ(ierr);
2645   } else {
2646     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2647   }
2648   PetscFunctionReturn(0);
2649 }
2650 
2651 #undef __FUNCT__
2652 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2653 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2654 {
2655   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2656   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2657   MatScalar      *aa = a->a;
2658 
2659   PetscFunctionBegin;
2660   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2666 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2667 {
2668   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2669   PetscInt       i,nz = a->bs2*a->i[a->mbs];
2670   MatScalar      *aa = a->a;
2671 
2672   PetscFunctionBegin;
2673   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2674   PetscFunctionReturn(0);
2675 }
2676 
2677 extern PetscErrorCode MatFDColoringCreate_SeqAIJ(Mat,ISColoring,MatFDColoring);
2678 
2679 #undef __FUNCT__
2680 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2681 /*
2682     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2683 */
2684 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2685 {
2686   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2687   PetscErrorCode ierr;
2688   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2689   PetscInt       nz = a->i[m],row,*jj,mr,col;
2690 
2691   PetscFunctionBegin;
2692   *nn = n;
2693   if (!ia) PetscFunctionReturn(0);
2694   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2695   else {
2696     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&collengths);CHKERRQ(ierr);
2697     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2698     ierr = PetscMalloc((n+1)*sizeof(PetscInt),&cia);CHKERRQ(ierr);
2699     ierr = PetscMalloc((nz+1)*sizeof(PetscInt),&cja);CHKERRQ(ierr);
2700     jj = a->j;
2701     for (i=0; i<nz; i++) {
2702       collengths[jj[i]]++;
2703     }
2704     cia[0] = oshift;
2705     for (i=0; i<n; i++) {
2706       cia[i+1] = cia[i] + collengths[i];
2707     }
2708     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2709     jj   = a->j;
2710     for (row=0; row<m; row++) {
2711       mr = a->i[row+1] - a->i[row];
2712       for (i=0; i<mr; i++) {
2713         col = *jj++;
2714         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2715       }
2716     }
2717     ierr = PetscFree(collengths);CHKERRQ(ierr);
2718     *ia = cia; *ja = cja;
2719   }
2720   PetscFunctionReturn(0);
2721 }
2722 
2723 #undef __FUNCT__
2724 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2725 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool  symmetric,PetscBool  inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2726 {
2727   PetscErrorCode ierr;
2728 
2729   PetscFunctionBegin;
2730   if (!ia) PetscFunctionReturn(0);
2731   ierr = PetscFree(*ia);CHKERRQ(ierr);
2732   ierr = PetscFree(*ja);CHKERRQ(ierr);
2733   PetscFunctionReturn(0);
2734 }
2735 
2736 #undef __FUNCT__
2737 #define __FUNCT__ "MatFDColoringApply_BAIJ"
2738 PetscErrorCode  MatFDColoringApply_BAIJ(Mat J,MatFDColoring coloring,Vec x1,MatStructure *flag,void *sctx)
2739 {
2740   PetscErrorCode (*f)(void*,Vec,Vec,void*) = (PetscErrorCode (*)(void*,Vec,Vec,void *))coloring->f;
2741   PetscErrorCode ierr;
2742   PetscInt       bs = J->rmap->bs,i,j,k,start,end,l,row,col,*srows,**vscaleforrow;
2743   PetscScalar    dx,*y,*xx,*w3_array;
2744   PetscScalar    *vscale_array;
2745   PetscReal      epsilon = coloring->error_rel,umin = coloring->umin,unorm;
2746   Vec            w1=coloring->w1,w2=coloring->w2,w3;
2747   void           *fctx = coloring->fctx;
2748   PetscBool      flg = PETSC_FALSE;
2749   PetscInt       ctype=coloring->ctype,N,col_start=0,col_end=0;
2750   Vec            x1_tmp;
2751 
2752   PetscFunctionBegin;
2753   PetscValidHeaderSpecific(J,MAT_CLASSID,1);
2754   PetscValidHeaderSpecific(coloring,MAT_FDCOLORING_CLASSID,2);
2755   PetscValidHeaderSpecific(x1,VEC_CLASSID,3);
2756   if (!f) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatFDColoringSetFunction()");
2757 
2758   ierr = PetscLogEventBegin(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2759   ierr = MatSetUnfactored(J);CHKERRQ(ierr);
2760   ierr = PetscOptionsGetBool(PETSC_NULL,"-mat_fd_coloring_dont_rezero",&flg,PETSC_NULL);CHKERRQ(ierr);
2761   if (flg) {
2762     ierr = PetscInfo(coloring,"Not calling MatZeroEntries()\n");CHKERRQ(ierr);
2763   } else {
2764     PetscBool  assembled;
2765     ierr = MatAssembled(J,&assembled);CHKERRQ(ierr);
2766     if (assembled) {
2767       ierr = MatZeroEntries(J);CHKERRQ(ierr);
2768     }
2769   }
2770 
2771   x1_tmp = x1;
2772   if (!coloring->vscale) {
2773     ierr = VecDuplicate(x1_tmp,&coloring->vscale);CHKERRQ(ierr);
2774   }
2775 
2776   if (coloring->htype[0] == 'w') { /* tacky test; need to make systematic if we add other approaches to computing h*/
2777     ierr = VecNorm(x1_tmp,NORM_2,&unorm);CHKERRQ(ierr);
2778   }
2779   ierr = VecGetOwnershipRange(w1,&start,&end);CHKERRQ(ierr); /* OwnershipRange is used by ghosted x! */
2780 
2781   /* Set w1 = F(x1) */
2782   if (!coloring->fset) {
2783     ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2784     ierr = (*f)(sctx,x1_tmp,w1,fctx);CHKERRQ(ierr);
2785     ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2786   } else {
2787     coloring->fset = PETSC_FALSE;
2788   }
2789 
2790   if (!coloring->w3) {
2791     ierr = VecDuplicate(x1_tmp,&coloring->w3);CHKERRQ(ierr);
2792     ierr = PetscLogObjectParent(coloring,coloring->w3);CHKERRQ(ierr);
2793   }
2794   w3 = coloring->w3;
2795 
2796     CHKMEMQ;
2797     /* Compute all the local scale factors, including ghost points */
2798   ierr = VecGetLocalSize(x1_tmp,&N);CHKERRQ(ierr);
2799   ierr = VecGetArray(x1_tmp,&xx);CHKERRQ(ierr);
2800   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2801   if (ctype == IS_COLORING_GHOSTED) {
2802     col_start = 0; col_end = N;
2803   } else if (ctype == IS_COLORING_GLOBAL) {
2804     xx = xx - start;
2805     vscale_array = vscale_array - start;
2806     col_start = start; col_end = N + start;
2807   }    CHKMEMQ;
2808   for (col=col_start; col<col_end; col++) {
2809     /* Loop over each local column, vscale[col] = 1./(epsilon*dx[col]) */
2810     if (coloring->htype[0] == 'w') {
2811       dx = 1.0 + unorm;
2812     } else {
2813       dx  = xx[col];
2814     }
2815     if (dx == (PetscScalar)0.0) dx = 1.0;
2816     if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2817     else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2818     dx               *= epsilon;
2819     vscale_array[col] = (PetscScalar)1.0/dx;
2820   }     CHKMEMQ;
2821   if (ctype == IS_COLORING_GLOBAL)  vscale_array = vscale_array + start;
2822   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2823   if (ctype == IS_COLORING_GLOBAL) {
2824     ierr = VecGhostUpdateBegin(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2825     ierr = VecGhostUpdateEnd(coloring->vscale,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2826   }
2827   CHKMEMQ;
2828   if (coloring->vscaleforrow) {
2829     vscaleforrow = coloring->vscaleforrow;
2830   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_NULL,"Null Object: coloring->vscaleforrow");
2831 
2832   ierr = PetscMalloc(bs*sizeof(PetscInt),&srows);CHKERRQ(ierr);
2833   /*
2834     Loop over each color
2835   */
2836   ierr = VecGetArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2837   for (k=0; k<coloring->ncolors; k++) {
2838     coloring->currentcolor = k;
2839     for (i=0; i<bs; i++) {
2840       ierr = VecCopy(x1_tmp,w3);CHKERRQ(ierr);
2841       ierr = VecGetArray(w3,&w3_array);CHKERRQ(ierr);
2842       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array - start;
2843       /*
2844         Loop over each column associated with color
2845         adding the perturbation to the vector w3.
2846       */
2847       for (l=0; l<coloring->ncolumns[k]; l++) {
2848         col = i + bs*coloring->columns[k][l];    /* local column of the matrix we are probing for */
2849         if (coloring->htype[0] == 'w') {
2850           dx = 1.0 + unorm;
2851         } else {
2852           dx  = xx[col];
2853         }
2854         if (dx == (PetscScalar)0.0) dx = 1.0;
2855         if (PetscAbsScalar(dx) < umin && PetscRealPart(dx) >= 0.0)     dx = umin;
2856         else if (PetscRealPart(dx) < 0.0 && PetscAbsScalar(dx) < umin) dx = -umin;
2857         dx            *= epsilon;
2858         if (!PetscAbsScalar(dx)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Computed 0 differencing parameter");
2859         w3_array[col] += dx;
2860       }
2861       if (ctype == IS_COLORING_GLOBAL) w3_array = w3_array + start;
2862       ierr = VecRestoreArray(w3,&w3_array);CHKERRQ(ierr);
2863 
2864       /*
2865         Evaluate function at w3 = x1 + dx (here dx is a vector of perturbations)
2866         w2 = F(x1 + dx) - F(x1)
2867       */
2868       ierr = PetscLogEventBegin(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2869       ierr = (*f)(sctx,w3,w2,fctx);CHKERRQ(ierr);
2870       ierr = PetscLogEventEnd(MAT_FDColoringFunction,0,0,0,0);CHKERRQ(ierr);
2871       ierr = VecAXPY(w2,-1.0,w1);CHKERRQ(ierr);
2872 
2873       /*
2874         Loop over rows of vector, putting results into Jacobian matrix
2875       */
2876       ierr = VecGetArray(w2,&y);CHKERRQ(ierr);
2877       for (l=0; l<coloring->nrows[k]; l++) {
2878         row    = bs*coloring->rows[k][l];             /* local row index */
2879         col    = i + bs*coloring->columnsforrow[k][l];    /* global column index */
2880         for (j=0; j<bs; j++) {
2881           y[row+j] *= vscale_array[j+bs*vscaleforrow[k][l]];
2882           srows[j]  = row + start + j;
2883         }
2884         ierr   = MatSetValues(J,bs,srows,1,&col,y+row,INSERT_VALUES);CHKERRQ(ierr);
2885       }
2886       ierr = VecRestoreArray(w2,&y);CHKERRQ(ierr);
2887     }
2888   } /* endof for each color */
2889   if (ctype == IS_COLORING_GLOBAL) xx = xx + start;
2890   ierr = VecRestoreArray(coloring->vscale,&vscale_array);CHKERRQ(ierr);
2891   ierr = VecRestoreArray(x1_tmp,&xx);CHKERRQ(ierr);
2892   ierr = PetscFree(srows);CHKERRQ(ierr);
2893 
2894   coloring->currentcolor = -1;
2895   ierr  = MatAssemblyBegin(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2896   ierr  = MatAssemblyEnd(J,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2897   ierr = PetscLogEventEnd(MAT_FDColoringApply,coloring,J,x1,0);CHKERRQ(ierr);
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 /* -------------------------------------------------------------------*/
2902 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2903        MatGetRow_SeqBAIJ,
2904        MatRestoreRow_SeqBAIJ,
2905        MatMult_SeqBAIJ_N,
2906 /* 4*/ MatMultAdd_SeqBAIJ_N,
2907        MatMultTranspose_SeqBAIJ,
2908        MatMultTransposeAdd_SeqBAIJ,
2909        0,
2910        0,
2911        0,
2912 /*10*/ 0,
2913        MatLUFactor_SeqBAIJ,
2914        0,
2915        0,
2916        MatTranspose_SeqBAIJ,
2917 /*15*/ MatGetInfo_SeqBAIJ,
2918        MatEqual_SeqBAIJ,
2919        MatGetDiagonal_SeqBAIJ,
2920        MatDiagonalScale_SeqBAIJ,
2921        MatNorm_SeqBAIJ,
2922 /*20*/ 0,
2923        MatAssemblyEnd_SeqBAIJ,
2924        MatSetOption_SeqBAIJ,
2925        MatZeroEntries_SeqBAIJ,
2926 /*24*/ MatZeroRows_SeqBAIJ,
2927        0,
2928        0,
2929        0,
2930        0,
2931 /*29*/ MatSetUp_SeqBAIJ,
2932        0,
2933        0,
2934        0,
2935        0,
2936 /*34*/ MatDuplicate_SeqBAIJ,
2937        0,
2938        0,
2939        MatILUFactor_SeqBAIJ,
2940        0,
2941 /*39*/ MatAXPY_SeqBAIJ,
2942        MatGetSubMatrices_SeqBAIJ,
2943        MatIncreaseOverlap_SeqBAIJ,
2944        MatGetValues_SeqBAIJ,
2945        MatCopy_SeqBAIJ,
2946 /*44*/ 0,
2947        MatScale_SeqBAIJ,
2948        0,
2949        0,
2950        MatZeroRowsColumns_SeqBAIJ,
2951 /*49*/ 0,
2952        MatGetRowIJ_SeqBAIJ,
2953        MatRestoreRowIJ_SeqBAIJ,
2954        MatGetColumnIJ_SeqBAIJ,
2955        MatRestoreColumnIJ_SeqBAIJ,
2956 /*54*/ MatFDColoringCreate_SeqAIJ,
2957        0,
2958        0,
2959        0,
2960        MatSetValuesBlocked_SeqBAIJ,
2961 /*59*/ MatGetSubMatrix_SeqBAIJ,
2962        MatDestroy_SeqBAIJ,
2963        MatView_SeqBAIJ,
2964        0,
2965        0,
2966 /*64*/ 0,
2967        0,
2968        0,
2969        0,
2970        0,
2971 /*69*/ MatGetRowMaxAbs_SeqBAIJ,
2972        0,
2973        MatConvert_Basic,
2974        0,
2975        0,
2976 /*74*/ 0,
2977        MatFDColoringApply_BAIJ,
2978        0,
2979        0,
2980        0,
2981 /*79*/ 0,
2982        0,
2983        0,
2984        0,
2985        MatLoad_SeqBAIJ,
2986 /*84*/ 0,
2987        0,
2988        0,
2989        0,
2990        0,
2991 /*89*/ 0,
2992        0,
2993        0,
2994        0,
2995        0,
2996 /*94*/ 0,
2997        0,
2998        0,
2999        0,
3000        0,
3001 /*99*/0,
3002        0,
3003        0,
3004        0,
3005        0,
3006 /*104*/0,
3007        MatRealPart_SeqBAIJ,
3008        MatImaginaryPart_SeqBAIJ,
3009        0,
3010        0,
3011 /*109*/0,
3012        0,
3013        0,
3014        0,
3015        MatMissingDiagonal_SeqBAIJ,
3016 /*114*/0,
3017        0,
3018        0,
3019        0,
3020        0,
3021 /*119*/0,
3022        0,
3023        MatMultHermitianTranspose_SeqBAIJ,
3024        MatMultHermitianTransposeAdd_SeqBAIJ,
3025        0,
3026 /*124*/0,
3027        0,
3028        MatInvertBlockDiagonal_SeqBAIJ
3029 };
3030 
3031 EXTERN_C_BEGIN
3032 #undef __FUNCT__
3033 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
3034 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
3035 {
3036   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3037   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3038   PetscErrorCode ierr;
3039 
3040   PetscFunctionBegin;
3041   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3042 
3043   /* allocate space for values if not already there */
3044   if (!aij->saved_values) {
3045     ierr = PetscMalloc((nz+1)*sizeof(PetscScalar),&aij->saved_values);CHKERRQ(ierr);
3046     ierr = PetscLogObjectMemory(mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
3047   }
3048 
3049   /* copy values over */
3050   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3051   PetscFunctionReturn(0);
3052 }
3053 EXTERN_C_END
3054 
3055 EXTERN_C_BEGIN
3056 #undef __FUNCT__
3057 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
3058 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
3059 {
3060   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ *)mat->data;
3061   PetscErrorCode ierr;
3062   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
3063 
3064   PetscFunctionBegin;
3065   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
3066   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
3067 
3068   /* copy values over */
3069   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
3070   PetscFunctionReturn(0);
3071 }
3072 EXTERN_C_END
3073 
3074 EXTERN_C_BEGIN
3075 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
3076 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
3077 EXTERN_C_END
3078 
3079 EXTERN_C_BEGIN
3080 #undef __FUNCT__
3081 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
3082 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
3083 {
3084   Mat_SeqBAIJ    *b;
3085   PetscErrorCode ierr;
3086   PetscInt       i,mbs,nbs,bs2;
3087   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
3088 
3089   PetscFunctionBegin;
3090   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
3091   if (nz == MAT_SKIP_ALLOCATION) {
3092     skipallocation = PETSC_TRUE;
3093     nz             = 0;
3094   }
3095 
3096   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3097   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3098   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3099   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3100   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3101 
3102   B->preallocated = PETSC_TRUE;
3103 
3104   mbs  = B->rmap->n/bs;
3105   nbs  = B->cmap->n/bs;
3106   bs2  = bs*bs;
3107 
3108   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
3109 
3110   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3111   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
3112   if (nnz) {
3113     for (i=0; i<mbs; i++) {
3114       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
3115       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
3116     }
3117   }
3118 
3119   b       = (Mat_SeqBAIJ*)B->data;
3120   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
3121     ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",PETSC_NULL,PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
3122   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3123 
3124   if (!flg) {
3125     switch (bs) {
3126     case 1:
3127       B->ops->mult            = MatMult_SeqBAIJ_1;
3128       B->ops->multadd         = MatMultAdd_SeqBAIJ_1;
3129       B->ops->sor             = MatSOR_SeqBAIJ_1;
3130       break;
3131     case 2:
3132       B->ops->mult            = MatMult_SeqBAIJ_2;
3133       B->ops->multadd         = MatMultAdd_SeqBAIJ_2;
3134       B->ops->sor             = MatSOR_SeqBAIJ_2;
3135       break;
3136     case 3:
3137       B->ops->mult            = MatMult_SeqBAIJ_3;
3138       B->ops->multadd         = MatMultAdd_SeqBAIJ_3;
3139       B->ops->sor             = MatSOR_SeqBAIJ_3;
3140       break;
3141     case 4:
3142       B->ops->mult            = MatMult_SeqBAIJ_4;
3143       B->ops->multadd         = MatMultAdd_SeqBAIJ_4;
3144       B->ops->sor             = MatSOR_SeqBAIJ_4;
3145       break;
3146     case 5:
3147       B->ops->mult            = MatMult_SeqBAIJ_5;
3148       B->ops->multadd         = MatMultAdd_SeqBAIJ_5;
3149       B->ops->sor             = MatSOR_SeqBAIJ_5;
3150       break;
3151     case 6:
3152       B->ops->mult            = MatMult_SeqBAIJ_6;
3153       B->ops->multadd         = MatMultAdd_SeqBAIJ_6;
3154       B->ops->sor             = MatSOR_SeqBAIJ_6;
3155       break;
3156     case 7:
3157       B->ops->mult            = MatMult_SeqBAIJ_7;
3158       B->ops->multadd         = MatMultAdd_SeqBAIJ_7;
3159       B->ops->sor             = MatSOR_SeqBAIJ_7;
3160       break;
3161     case 15:
3162       B->ops->mult            = MatMult_SeqBAIJ_15_ver1;
3163       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3164       B->ops->sor             = MatSOR_SeqBAIJ_N;
3165       break;
3166     default:
3167       B->ops->mult            = MatMult_SeqBAIJ_N;
3168       B->ops->multadd         = MatMultAdd_SeqBAIJ_N;
3169       B->ops->sor             = MatSOR_SeqBAIJ_N;
3170       break;
3171     }
3172   }
3173   b->mbs       = mbs;
3174   b->nbs       = nbs;
3175   if (!skipallocation) {
3176     if (!b->imax) {
3177       ierr = PetscMalloc2(mbs,PetscInt,&b->imax,mbs,PetscInt,&b->ilen);CHKERRQ(ierr);
3178       ierr = PetscLogObjectMemory(B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3179       b->free_imax_ilen = PETSC_TRUE;
3180     }
3181     /* b->ilen will count nonzeros in each block row so far. */
3182     for (i=0; i<mbs; i++) { b->ilen[i] = 0;}
3183     if (!nnz) {
3184       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
3185       else if (nz < 0) nz = 1;
3186       for (i=0; i<mbs; i++) b->imax[i] = nz;
3187       nz = nz*mbs;
3188     } else {
3189       nz = 0;
3190       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
3191     }
3192 
3193     /* allocate the matrix space */
3194     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
3195     ierr = PetscMalloc3(bs2*nz,PetscScalar,&b->a,nz,PetscInt,&b->j,B->rmap->N+1,PetscInt,&b->i);CHKERRQ(ierr);
3196     ierr = PetscLogObjectMemory(B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
3197     ierr  = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
3198     ierr  = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3199     b->singlemalloc = PETSC_TRUE;
3200     b->i[0] = 0;
3201     for (i=1; i<mbs+1; i++) {
3202       b->i[i] = b->i[i-1] + b->imax[i-1];
3203     }
3204     b->free_a     = PETSC_TRUE;
3205     b->free_ij    = PETSC_TRUE;
3206   } else {
3207     b->free_a     = PETSC_FALSE;
3208     b->free_ij    = PETSC_FALSE;
3209   }
3210 
3211   b->bs2              = bs2;
3212   b->mbs              = mbs;
3213   b->nz               = 0;
3214   b->maxnz            = nz;
3215   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3216   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3217   PetscFunctionReturn(0);
3218 }
3219 EXTERN_C_END
3220 
3221 EXTERN_C_BEGIN
3222 #undef __FUNCT__
3223 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
3224 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3225 {
3226   PetscInt       i,m,nz,nz_max=0,*nnz;
3227   PetscScalar    *values=0;
3228   PetscErrorCode ierr;
3229 
3230   PetscFunctionBegin;
3231   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3232   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3233   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3234   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3235   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3236   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3237   m = B->rmap->n/bs;
3238 
3239   if (ii[0] != 0) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]); }
3240   ierr = PetscMalloc((m+1) * sizeof(PetscInt), &nnz);CHKERRQ(ierr);
3241   for (i=0; i<m; i++) {
3242     nz = ii[i+1]- ii[i];
3243     if (nz < 0) { SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz); }
3244     nz_max = PetscMax(nz_max, nz);
3245     nnz[i] = nz;
3246   }
3247   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3248   ierr = PetscFree(nnz);CHKERRQ(ierr);
3249 
3250   values = (PetscScalar*)V;
3251   if (!values) {
3252     ierr = PetscMalloc(bs*bs*(nz_max+1)*sizeof(PetscScalar),&values);CHKERRQ(ierr);
3253     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
3254   }
3255   for (i=0; i<m; i++) {
3256     PetscInt          ncols  = ii[i+1] - ii[i];
3257     const PetscInt    *icols = jj + ii[i];
3258     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3259     ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3260   }
3261   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3262   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3263   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3264   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3265   PetscFunctionReturn(0);
3266 }
3267 EXTERN_C_END
3268 
3269 
3270 EXTERN_C_BEGIN
3271 extern PetscErrorCode  MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
3272 extern PetscErrorCode  MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
3273 #if defined(PETSC_HAVE_MUMPS)
3274 extern PetscErrorCode  MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
3275 #endif
3276 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,Mat*);
3277 EXTERN_C_END
3278 
3279 /*MC
3280    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3281    block sparse compressed row format.
3282 
3283    Options Database Keys:
3284 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3285 
3286   Level: beginner
3287 
3288 .seealso: MatCreateSeqBAIJ()
3289 M*/
3290 
3291 EXTERN_C_BEGIN
3292 extern PetscErrorCode  MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3293 EXTERN_C_END
3294 
3295 EXTERN_C_BEGIN
3296 #undef __FUNCT__
3297 #define __FUNCT__ "MatCreate_SeqBAIJ"
3298 PetscErrorCode  MatCreate_SeqBAIJ(Mat B)
3299 {
3300   PetscErrorCode ierr;
3301   PetscMPIInt    size;
3302   Mat_SeqBAIJ    *b;
3303 
3304   PetscFunctionBegin;
3305   ierr = MPI_Comm_size(((PetscObject)B)->comm,&size);CHKERRQ(ierr);
3306   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3307 
3308   ierr    = PetscNewLog(B,Mat_SeqBAIJ,&b);CHKERRQ(ierr);
3309   B->data = (void*)b;
3310   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3311   b->row                   = 0;
3312   b->col                   = 0;
3313   b->icol                  = 0;
3314   b->reallocs              = 0;
3315   b->saved_values          = 0;
3316 
3317   b->roworiented           = PETSC_TRUE;
3318   b->nonew                 = 0;
3319   b->diag                  = 0;
3320   b->solve_work            = 0;
3321   b->mult_work             = 0;
3322   B->spptr                 = 0;
3323   B->info.nz_unneeded      = (PetscReal)b->maxnz*b->bs2;
3324   b->keepnonzeropattern    = PETSC_FALSE;
3325   b->xtoy                  = 0;
3326   b->XtoY                  = 0;
3327   B->same_nonzero          = PETSC_FALSE;
3328 
3329   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactorAvailable_petsc_C",
3330                                      "MatGetFactorAvailable_seqbaij_petsc",
3331                                      MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3332   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_petsc_C",
3333                                      "MatGetFactor_seqbaij_petsc",
3334                                      MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3335   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_bstrm_C",
3336                                      "MatGetFactor_seqbaij_bstrm",
3337                                      MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3338 #if defined(PETSC_HAVE_MUMPS)
3339   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C", "MatGetFactor_baij_mumps", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3340 #endif
3341   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatInvertBlockDiagonal_C",
3342                                      "MatInvertBlockDiagonal_SeqBAIJ",
3343                                       MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3344   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
3345                                      "MatStoreValues_SeqBAIJ",
3346                                       MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3347   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
3348                                      "MatRetrieveValues_SeqBAIJ",
3349                                       MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3350   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",
3351                                      "MatSeqBAIJSetColumnIndices_SeqBAIJ",
3352                                       MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3353   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqaij_C",
3354                                      "MatConvert_SeqBAIJ_SeqAIJ",
3355                                       MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3356   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",
3357                                      "MatConvert_SeqBAIJ_SeqSBAIJ",
3358                                       MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3359   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocation_C",
3360                                      "MatSeqBAIJSetPreallocation_SeqBAIJ",
3361                                       MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3362   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",
3363                                      "MatSeqBAIJSetPreallocationCSR_SeqBAIJ",
3364                                       MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3365   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",
3366                                      "MatConvert_SeqBAIJ_SeqBSTRM",
3367                                       MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3368   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatIsTranspose_C",
3369                                      "MatIsTranspose_SeqBAIJ",
3370                                       MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3371   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3372   PetscFunctionReturn(0);
3373 }
3374 EXTERN_C_END
3375 
3376 #undef __FUNCT__
3377 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3378 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool  mallocmatspace)
3379 {
3380   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3381   PetscErrorCode ierr;
3382   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3383 
3384   PetscFunctionBegin;
3385   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3386 
3387   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3388     c->imax = a->imax;
3389     c->ilen = a->ilen;
3390     c->free_imax_ilen = PETSC_FALSE;
3391   } else {
3392     ierr = PetscMalloc2(mbs,PetscInt,&c->imax,mbs,PetscInt,&c->ilen);CHKERRQ(ierr);
3393     ierr = PetscLogObjectMemory(C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3394     for (i=0; i<mbs; i++) {
3395       c->imax[i] = a->imax[i];
3396       c->ilen[i] = a->ilen[i];
3397     }
3398     c->free_imax_ilen = PETSC_TRUE;
3399   }
3400 
3401   /* allocate the matrix space */
3402   if (mallocmatspace) {
3403     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3404       ierr = PetscMalloc(bs2*nz*sizeof(PetscScalar),&c->a);CHKERRQ(ierr);
3405       ierr = PetscLogObjectMemory(C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3406       ierr = PetscMemzero(c->a,bs2*nz*sizeof(PetscScalar));CHKERRQ(ierr);
3407       c->i            = a->i;
3408       c->j            = a->j;
3409       c->singlemalloc = PETSC_FALSE;
3410       c->free_a       = PETSC_TRUE;
3411       c->free_ij      = PETSC_FALSE;
3412       c->parent       = A;
3413       C->preallocated = PETSC_TRUE;
3414       C->assembled    = PETSC_TRUE;
3415       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3416       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3417       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3418     } else {
3419       ierr = PetscMalloc3(bs2*nz,PetscScalar,&c->a,nz,PetscInt,&c->j,mbs+1,PetscInt,&c->i);CHKERRQ(ierr);
3420       ierr = PetscLogObjectMemory(C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3421       c->singlemalloc = PETSC_TRUE;
3422       c->free_a       = PETSC_TRUE;
3423       c->free_ij      = PETSC_TRUE;
3424       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3425       if (mbs > 0) {
3426         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3427         if (cpvalues == MAT_COPY_VALUES) {
3428           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3429         } else {
3430           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3431         }
3432       }
3433       C->preallocated = PETSC_TRUE;
3434       C->assembled    = PETSC_TRUE;
3435     }
3436   }
3437 
3438   c->roworiented = a->roworiented;
3439   c->nonew       = a->nonew;
3440   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3441   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3442   c->bs2         = a->bs2;
3443   c->mbs         = a->mbs;
3444   c->nbs         = a->nbs;
3445 
3446   if (a->diag) {
3447     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3448       c->diag      = a->diag;
3449       c->free_diag = PETSC_FALSE;
3450     } else {
3451       ierr = PetscMalloc((mbs+1)*sizeof(PetscInt),&c->diag);CHKERRQ(ierr);
3452       ierr = PetscLogObjectMemory(C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3453       for (i=0; i<mbs; i++) {
3454         c->diag[i] = a->diag[i];
3455       }
3456       c->free_diag = PETSC_TRUE;
3457     }
3458   } else c->diag        = 0;
3459   c->nz                 = a->nz;
3460   c->maxnz              = a->nz; /* Since we allocate exactly the right amount */
3461   c->solve_work         = 0;
3462   c->mult_work          = 0;
3463 
3464   c->compressedrow.use     = a->compressedrow.use;
3465   c->compressedrow.nrows   = a->compressedrow.nrows;
3466   c->compressedrow.check   = a->compressedrow.check;
3467   if (a->compressedrow.use) {
3468     i = a->compressedrow.nrows;
3469     ierr = PetscMalloc2(i+1,PetscInt,&c->compressedrow.i,i+1,PetscInt,&c->compressedrow.rindex);CHKERRQ(ierr);
3470     ierr = PetscLogObjectMemory(C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3471     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3472     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3473   } else {
3474     c->compressedrow.use    = PETSC_FALSE;
3475     c->compressedrow.i      = PETSC_NULL;
3476     c->compressedrow.rindex = PETSC_NULL;
3477   }
3478   C->same_nonzero = A->same_nonzero;
3479   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3480   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3481   PetscFunctionReturn(0);
3482 }
3483 
3484 #undef __FUNCT__
3485 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3486 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3487 {
3488     PetscErrorCode ierr;
3489 
3490   PetscFunctionBegin;
3491   ierr = MatCreate(((PetscObject)A)->comm,B);CHKERRQ(ierr);
3492   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3493   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3494   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3495   PetscFunctionReturn(0);
3496 }
3497 
3498 #undef __FUNCT__
3499 #define __FUNCT__ "MatLoad_SeqBAIJ"
3500 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3501 {
3502   Mat_SeqBAIJ    *a;
3503   PetscErrorCode ierr;
3504   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3505   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3506   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3507   PetscInt       *masked,nmask,tmp,bs2,ishift;
3508   PetscMPIInt    size;
3509   int            fd;
3510   PetscScalar    *aa;
3511   MPI_Comm       comm = ((PetscObject)viewer)->comm;
3512 
3513   PetscFunctionBegin;
3514   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3515   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
3516   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3517   bs2  = bs*bs;
3518 
3519   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3520   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3521   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3522   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3523   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3524   M = header[1]; N = header[2]; nz = header[3];
3525 
3526   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3527   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3528 
3529   /*
3530      This code adds extra rows to make sure the number of rows is
3531     divisible by the blocksize
3532   */
3533   mbs        = M/bs;
3534   extra_rows = bs - M + bs*(mbs);
3535   if (extra_rows == bs) extra_rows = 0;
3536   else                  mbs++;
3537   if (extra_rows) {
3538     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3539   }
3540 
3541   /* Set global sizes if not already set */
3542   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3543     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3544   } else { /* Check if the matrix global sizes are correct */
3545     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3546     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3547       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3548     }
3549     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3550   }
3551 
3552   /* read in row lengths */
3553   ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
3554   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3555   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3556 
3557   /* read in column indices */
3558   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscInt),&jj);CHKERRQ(ierr);
3559   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3560   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3561 
3562   /* loop over row lengths determining block row lengths */
3563   ierr     = PetscMalloc(mbs*sizeof(PetscInt),&browlengths);CHKERRQ(ierr);
3564   ierr     = PetscMemzero(browlengths,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3565   ierr     = PetscMalloc2(mbs,PetscInt,&mask,mbs,PetscInt,&masked);CHKERRQ(ierr);
3566   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3567   rowcount = 0;
3568   nzcount = 0;
3569   for (i=0; i<mbs; i++) {
3570     nmask = 0;
3571     for (j=0; j<bs; j++) {
3572       kmax = rowlengths[rowcount];
3573       for (k=0; k<kmax; k++) {
3574         tmp = jj[nzcount++]/bs;
3575         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3576       }
3577       rowcount++;
3578     }
3579     browlengths[i] += nmask;
3580     /* zero out the mask elements we set */
3581     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3582   }
3583 
3584   /* Do preallocation  */
3585   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3586   a = (Mat_SeqBAIJ*)newmat->data;
3587 
3588   /* set matrix "i" values */
3589   a->i[0] = 0;
3590   for (i=1; i<= mbs; i++) {
3591     a->i[i]      = a->i[i-1] + browlengths[i-1];
3592     a->ilen[i-1] = browlengths[i-1];
3593   }
3594   a->nz         = 0;
3595   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3596 
3597   /* read in nonzero values */
3598   ierr = PetscMalloc((nz+extra_rows)*sizeof(PetscScalar),&aa);CHKERRQ(ierr);
3599   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3600   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3601 
3602   /* set "a" and "j" values into matrix */
3603   nzcount = 0; jcount = 0;
3604   for (i=0; i<mbs; i++) {
3605     nzcountb = nzcount;
3606     nmask    = 0;
3607     for (j=0; j<bs; j++) {
3608       kmax = rowlengths[i*bs+j];
3609       for (k=0; k<kmax; k++) {
3610         tmp = jj[nzcount++]/bs;
3611         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3612       }
3613     }
3614     /* sort the masked values */
3615     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3616 
3617     /* set "j" values into matrix */
3618     maskcount = 1;
3619     for (j=0; j<nmask; j++) {
3620       a->j[jcount++]  = masked[j];
3621       mask[masked[j]] = maskcount++;
3622     }
3623     /* set "a" values into matrix */
3624     ishift = bs2*a->i[i];
3625     for (j=0; j<bs; j++) {
3626       kmax = rowlengths[i*bs+j];
3627       for (k=0; k<kmax; k++) {
3628         tmp       = jj[nzcountb]/bs ;
3629         block     = mask[tmp] - 1;
3630         point     = jj[nzcountb] - bs*tmp;
3631         idx       = ishift + bs2*block + j + bs*point;
3632         a->a[idx] = (MatScalar)aa[nzcountb++];
3633       }
3634     }
3635     /* zero out the mask elements we set */
3636     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3637   }
3638   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3639 
3640   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3641   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3642   ierr = PetscFree(aa);CHKERRQ(ierr);
3643   ierr = PetscFree(jj);CHKERRQ(ierr);
3644   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3645 
3646   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3647   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3648   PetscFunctionReturn(0);
3649 }
3650 
3651 #undef __FUNCT__
3652 #define __FUNCT__ "MatCreateSeqBAIJ"
3653 /*@C
3654    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3655    compressed row) format.  For good matrix assembly performance the
3656    user should preallocate the matrix storage by setting the parameter nz
3657    (or the array nnz).  By setting these parameters accurately, performance
3658    during matrix assembly can be increased by more than a factor of 50.
3659 
3660    Collective on MPI_Comm
3661 
3662    Input Parameters:
3663 +  comm - MPI communicator, set to PETSC_COMM_SELF
3664 .  bs - size of block
3665 .  m - number of rows
3666 .  n - number of columns
3667 .  nz - number of nonzero blocks  per block row (same for all rows)
3668 -  nnz - array containing the number of nonzero blocks in the various block rows
3669          (possibly different for each block row) or PETSC_NULL
3670 
3671    Output Parameter:
3672 .  A - the matrix
3673 
3674    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3675    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3676    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3677 
3678    Options Database Keys:
3679 .   -mat_no_unroll - uses code that does not unroll the loops in the
3680                      block calculations (much slower)
3681 .    -mat_block_size - size of the blocks to use
3682 
3683    Level: intermediate
3684 
3685    Notes:
3686    The number of rows and columns must be divisible by blocksize.
3687 
3688    If the nnz parameter is given then the nz parameter is ignored
3689 
3690    A nonzero block is any block that as 1 or more nonzeros in it
3691 
3692    The block AIJ format is fully compatible with standard Fortran 77
3693    storage.  That is, the stored row and column indices can begin at
3694    either one (as in Fortran) or zero.  See the users' manual for details.
3695 
3696    Specify the preallocated storage with either nz or nnz (not both).
3697    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3698    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3699    matrices.
3700 
3701 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3702 @*/
3703 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3704 {
3705   PetscErrorCode ierr;
3706 
3707   PetscFunctionBegin;
3708   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3709   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3710   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3711   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3712   PetscFunctionReturn(0);
3713 }
3714 
3715 #undef __FUNCT__
3716 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3717 /*@C
3718    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3719    per row in the matrix. For good matrix assembly performance the
3720    user should preallocate the matrix storage by setting the parameter nz
3721    (or the array nnz).  By setting these parameters accurately, performance
3722    during matrix assembly can be increased by more than a factor of 50.
3723 
3724    Collective on MPI_Comm
3725 
3726    Input Parameters:
3727 +  A - the matrix
3728 .  bs - size of block
3729 .  nz - number of block nonzeros per block row (same for all rows)
3730 -  nnz - array containing the number of block nonzeros in the various block rows
3731          (possibly different for each block row) or PETSC_NULL
3732 
3733    Options Database Keys:
3734 .   -mat_no_unroll - uses code that does not unroll the loops in the
3735                      block calculations (much slower)
3736 .    -mat_block_size - size of the blocks to use
3737 
3738    Level: intermediate
3739 
3740    Notes:
3741    If the nnz parameter is given then the nz parameter is ignored
3742 
3743    You can call MatGetInfo() to get information on how effective the preallocation was;
3744    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3745    You can also run with the option -info and look for messages with the string
3746    malloc in them to see if additional memory allocation was needed.
3747 
3748    The block AIJ format is fully compatible with standard Fortran 77
3749    storage.  That is, the stored row and column indices can begin at
3750    either one (as in Fortran) or zero.  See the users' manual for details.
3751 
3752    Specify the preallocated storage with either nz or nnz (not both).
3753    Set nz=PETSC_DEFAULT and nnz=PETSC_NULL for PETSc to control dynamic memory
3754    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3755 
3756 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3757 @*/
3758 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3759 {
3760   PetscErrorCode ierr;
3761 
3762   PetscFunctionBegin;
3763   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3764   PetscValidType(B,1);
3765   PetscValidLogicalCollectiveInt(B,bs,2);
3766   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3767   PetscFunctionReturn(0);
3768 }
3769 
3770 #undef __FUNCT__
3771 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3772 /*@C
3773    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3774    (the default sequential PETSc format).
3775 
3776    Collective on MPI_Comm
3777 
3778    Input Parameters:
3779 +  A - the matrix
3780 .  i - the indices into j for the start of each local row (starts with zero)
3781 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3782 -  v - optional values in the matrix
3783 
3784    Level: developer
3785 
3786 .keywords: matrix, aij, compressed row, sparse
3787 
3788 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3789 @*/
3790 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3791 {
3792   PetscErrorCode ierr;
3793 
3794   PetscFunctionBegin;
3795   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3796   PetscValidType(B,1);
3797   PetscValidLogicalCollectiveInt(B,bs,2);
3798   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3799   PetscFunctionReturn(0);
3800 }
3801 
3802 
3803 #undef __FUNCT__
3804 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3805 /*@
3806      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3807 
3808      Collective on MPI_Comm
3809 
3810    Input Parameters:
3811 +  comm - must be an MPI communicator of size 1
3812 .  bs - size of block
3813 .  m - number of rows
3814 .  n - number of columns
3815 .  i - row indices
3816 .  j - column indices
3817 -  a - matrix values
3818 
3819    Output Parameter:
3820 .  mat - the matrix
3821 
3822    Level: advanced
3823 
3824    Notes:
3825        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3826     once the matrix is destroyed
3827 
3828        You cannot set new nonzero locations into this matrix, that will generate an error.
3829 
3830        The i and j indices are 0 based
3831 
3832        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3833 
3834 
3835 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3836 
3837 @*/
3838 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt* i,PetscInt*j,PetscScalar *a,Mat *mat)
3839 {
3840   PetscErrorCode ierr;
3841   PetscInt       ii;
3842   Mat_SeqBAIJ    *baij;
3843 
3844   PetscFunctionBegin;
3845   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3846   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3847 
3848   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3849   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3850   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3851   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3852   baij = (Mat_SeqBAIJ*)(*mat)->data;
3853   ierr = PetscMalloc2(m,PetscInt,&baij->imax,m,PetscInt,&baij->ilen);CHKERRQ(ierr);
3854   ierr = PetscLogObjectMemory(*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3855 
3856   baij->i = i;
3857   baij->j = j;
3858   baij->a = a;
3859   baij->singlemalloc = PETSC_FALSE;
3860   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3861   baij->free_a       = PETSC_FALSE;
3862   baij->free_ij      = PETSC_FALSE;
3863 
3864   for (ii=0; ii<m; ii++) {
3865     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3866 #if defined(PETSC_USE_DEBUG)
3867     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3868 #endif
3869   }
3870 #if defined(PETSC_USE_DEBUG)
3871   for (ii=0; ii<baij->i[m]; ii++) {
3872     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3873     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3874   }
3875 #endif
3876 
3877   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3878   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3879   PetscFunctionReturn(0);
3880 }
3881