xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 4cefc2ff75e4fd40cdd0827272dcde5d684e9831)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
156   }
157   if (!a->sor_workt) {
158     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
159   }
160   if (!a->sor_work) {
161     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
162   }
163   work = a->mult_work;
164   t    = a->sor_workt;
165   w    = a->sor_work;
166 
167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
168   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
169 
170   if (flag & SOR_ZERO_INITIAL_GUESS) {
171     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
172       switch (bs) {
173       case 1:
174         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
175         t[0] = b[0];
176         i2     = 1;
177         idiag += 1;
178         for (i=1; i<m; i++) {
179           v  = aa + ai[i];
180           vi = aj + ai[i];
181           nz = diag[i] - ai[i];
182           s[0] = b[i2];
183           for (j=0; j<nz; j++) {
184             xw[0] = x[vi[j]];
185             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
186           }
187           t[i2] = s[0];
188           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
189           x[i2]  = xw[0];
190           idiag += 1;
191           i2    += 1;
192         }
193         break;
194       case 2:
195         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
196         t[0] = b[0]; t[1] = b[1];
197         i2     = 2;
198         idiag += 4;
199         for (i=1; i<m; i++) {
200           v  = aa + 4*ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2]; s[1] = b[i2+1];
204           for (j=0; j<nz; j++) {
205             idx = 2*vi[j];
206             it  = 4*j;
207             xw[0] = x[idx]; xw[1] = x[1+idx];
208             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
209           }
210           t[i2] = s[0]; t[i2+1] = s[1];
211           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
212           x[i2]   = xw[0]; x[i2+1] = xw[1];
213           idiag  += 4;
214           i2     += 2;
215         }
216         break;
217       case 3:
218         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
219         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
220         i2     = 3;
221         idiag += 9;
222         for (i=1; i<m; i++) {
223           v  = aa + 9*ai[i];
224           vi = aj + ai[i];
225           nz = diag[i] - ai[i];
226           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
227           while (nz--) {
228             idx = 3*(*vi++);
229             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
230             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
231             v  += 9;
232           }
233           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
234           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
235           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
236           idiag  += 9;
237           i2     += 3;
238         }
239         break;
240       case 4:
241         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
242         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
243         i2     = 4;
244         idiag += 16;
245         for (i=1; i<m; i++) {
246           v  = aa + 16*ai[i];
247           vi = aj + ai[i];
248           nz = diag[i] - ai[i];
249           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
250           while (nz--) {
251             idx = 4*(*vi++);
252             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
253             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
254             v  += 16;
255           }
256           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
257           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
258           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
259           idiag  += 16;
260           i2     += 4;
261         }
262         break;
263       case 5:
264         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
266         i2     = 5;
267         idiag += 25;
268         for (i=1; i<m; i++) {
269           v  = aa + 25*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
273           while (nz--) {
274             idx = 5*(*vi++);
275             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
276             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
277             v  += 25;
278           }
279           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
280           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
281           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
282           idiag  += 25;
283           i2     += 5;
284         }
285         break;
286       case 6:
287         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
289         i2     = 6;
290         idiag += 36;
291         for (i=1; i<m; i++) {
292           v  = aa + 36*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
296           while (nz--) {
297             idx = 6*(*vi++);
298             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
300             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
301             v  += 36;
302           }
303           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
304           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
305           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
307           idiag  += 36;
308           i2     += 6;
309         }
310         break;
311       case 7:
312         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
314         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
315         i2     = 7;
316         idiag += 49;
317         for (i=1; i<m; i++) {
318           v  = aa + 49*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
322           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
323           while (nz--) {
324             idx = 7*(*vi++);
325             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
326             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
327             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
328             v  += 49;
329           }
330           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
331           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
332           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
333           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
334           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
335           idiag  += 49;
336           i2     += 7;
337         }
338         break;
339       default:
340         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
341         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
342         i2     = bs;
343         idiag += bs2;
344         for (i=1; i<m; i++) {
345           v  = aa + bs2*ai[i];
346           vi = aj + ai[i];
347           nz = diag[i] - ai[i];
348 
349           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
350           /* copy all rows of x that are needed into contiguous space */
351           workt = work;
352           for (j=0; j<nz; j++) {
353             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
354             workt += bs;
355           }
356           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
357           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
358           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
359 
360           idiag += bs2;
361           i2    += bs;
362         }
363         break;
364       }
365       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
366       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
367       xb = t;
368     }
369     else xb = b;
370     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
371       idiag = a->idiag+bs2*(a->mbs-1);
372       i2 = bs * (m-1);
373       switch (bs) {
374       case 1:
375         s[0]  = xb[i2];
376         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
377         x[i2] = xw[0];
378         i2   -= 1;
379         for (i=m-2; i>=0; i--) {
380           v  = aa + (diag[i]+1);
381           vi = aj + diag[i] + 1;
382           nz = ai[i+1] - diag[i] - 1;
383           s[0] = xb[i2];
384           for (j=0; j<nz; j++) {
385             xw[0] = x[vi[j]];
386             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
387           }
388           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
389           x[i2]  = xw[0];
390           idiag -= 1;
391           i2    -= 1;
392         }
393         break;
394       case 2:
395         s[0]  = xb[i2]; s[1] = xb[i2+1];
396         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
397         x[i2] = xw[0]; x[i2+1] = xw[1];
398         i2    -= 2;
399         idiag -= 4;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + 4*(diag[i] + 1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2]; s[1] = xb[i2+1];
405           for (j=0; j<nz; j++) {
406             idx = 2*vi[j];
407             it  = 4*j;
408             xw[0] = x[idx]; xw[1] = x[1+idx];
409             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
410           }
411           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
412           x[i2]   = xw[0]; x[i2+1] = xw[1];
413           idiag  -= 4;
414           i2     -= 2;
415         }
416         break;
417       case 3:
418         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
419         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
420         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
421         i2    -= 3;
422         idiag -= 9;
423         for (i=m-2; i>=0; i--) {
424           v  = aa + 9*(diag[i]+1);
425           vi = aj + diag[i] + 1;
426           nz = ai[i+1] - diag[i] - 1;
427           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
428           while (nz--) {
429             idx = 3*(*vi++);
430             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
431             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
432             v  += 9;
433           }
434           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
435           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
436           idiag  -= 9;
437           i2     -= 3;
438         }
439         break;
440       case 4:
441         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
442         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
443         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
444         i2    -= 4;
445         idiag -= 16;
446         for (i=m-2; i>=0; i--) {
447           v  = aa + 16*(diag[i]+1);
448           vi = aj + diag[i] + 1;
449           nz = ai[i+1] - diag[i] - 1;
450           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
451           while (nz--) {
452             idx = 4*(*vi++);
453             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
454             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
455             v  += 16;
456           }
457           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
458           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
459           idiag  -= 16;
460           i2     -= 4;
461         }
462         break;
463       case 5:
464         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
465         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
467         i2    -= 5;
468         idiag -= 25;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 25*(diag[i]+1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
474           while (nz--) {
475             idx = 5*(*vi++);
476             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
477             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
478             v  += 25;
479           }
480           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
481           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
482           idiag  -= 25;
483           i2     -= 5;
484         }
485         break;
486       case 6:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
488         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
490         i2    -= 6;
491         idiag -= 36;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 36*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
497           while (nz--) {
498             idx = 6*(*vi++);
499             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
501             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
502             v  += 36;
503           }
504           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
505           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
506           idiag  -= 36;
507           i2     -= 6;
508         }
509         break;
510       case 7:
511         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
512         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
513         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
514         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
515         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
516         i2    -= 7;
517         idiag -= 49;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 49*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
523           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
524           while (nz--) {
525             idx = 7*(*vi++);
526             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
527             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
528             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
529             v  += 49;
530           }
531           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
532           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
533           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
534           idiag  -= 49;
535           i2     -= 7;
536         }
537         break;
538       default:
539         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
540         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
541         i2    -= bs;
542         idiag -= bs2;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + bs2*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547 
548           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
549           /* copy all rows of x that are needed into contiguous space */
550           workt = work;
551           for (j=0; j<nz; j++) {
552             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
553             workt += bs;
554           }
555           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
556           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
557 
558           idiag -= bs2;
559           i2    -= bs;
560         }
561         break;
562       }
563       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
564     }
565     its--;
566   }
567   while (its--) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       idiag = a->idiag;
570       i2 = 0;
571       switch (bs) {
572       case 1:
573         for (i=0; i<m; i++) {
574           v  = aa + ai[i];
575           vi = aj + ai[i];
576           nz = ai[i+1] - ai[i];
577           s[0] = b[i2];
578           for (j=0; j<nz; j++) {
579             xw[0] = x[vi[j]];
580             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
581           }
582           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
583           x[i2] += xw[0];
584           idiag += 1;
585           i2    += 1;
586         }
587         break;
588       case 2:
589         for (i=0; i<m; i++) {
590           v  = aa + 4*ai[i];
591           vi = aj + ai[i];
592           nz = ai[i+1] - ai[i];
593           s[0] = b[i2]; s[1] = b[i2+1];
594           for (j=0; j<nz; j++) {
595             idx = 2*vi[j];
596             it  = 4*j;
597             xw[0] = x[idx]; xw[1] = x[1+idx];
598             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
599           }
600           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
601           x[i2]  += xw[0]; x[i2+1] += xw[1];
602           idiag  += 4;
603           i2     += 2;
604         }
605         break;
606       case 3:
607         for (i=0; i<m; i++) {
608           v  = aa + 9*ai[i];
609           vi = aj + ai[i];
610           nz = ai[i+1] - ai[i];
611           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
612           while (nz--) {
613             idx = 3*(*vi++);
614             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
615             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
616             v  += 9;
617           }
618           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
619           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
620           idiag  += 9;
621           i2     += 3;
622         }
623         break;
624       case 4:
625         for (i=0; i<m; i++) {
626           v  = aa + 16*ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
630           while (nz--) {
631             idx = 4*(*vi++);
632             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
633             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
634             v  += 16;
635           }
636           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
637           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
638           idiag  += 16;
639           i2     += 4;
640         }
641         break;
642       case 5:
643         for (i=0; i<m; i++) {
644           v  = aa + 25*ai[i];
645           vi = aj + ai[i];
646           nz = ai[i+1] - ai[i];
647           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
648           while (nz--) {
649             idx = 5*(*vi++);
650             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
651             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
652             v  += 25;
653           }
654           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
655           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
656           idiag  += 25;
657           i2     += 5;
658         }
659         break;
660       case 6:
661         for (i=0; i<m; i++) {
662           v  = aa + 36*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
666           while (nz--) {
667             idx = 6*(*vi++);
668             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
669             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
671             v  += 36;
672           }
673           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
675           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
676           idiag  += 36;
677           i2     += 6;
678         }
679         break;
680       case 7:
681         for (i=0; i<m; i++) {
682           v  = aa + 49*ai[i];
683           vi = aj + ai[i];
684           nz = ai[i+1] - ai[i];
685           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
686           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
687           while (nz--) {
688             idx = 7*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
692             v  += 49;
693           }
694           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
695           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
697           idiag  += 49;
698           i2     += 7;
699         }
700         break;
701       default:
702         for (i=0; i<m; i++) {
703           v  = aa + bs2*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706 
707           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
708           /* copy all rows of x that are needed into contiguous space */
709           workt = work;
710           for (j=0; j<nz; j++) {
711             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
712             workt += bs;
713           }
714           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
715           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
716 
717           idiag += bs2;
718           i2    += bs;
719         }
720         break;
721       }
722       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
723     }
724     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
725       idiag = a->idiag+bs2*(a->mbs-1);
726       i2 = bs * (m-1);
727       switch (bs) {
728       case 1:
729         for (i=m-1; i>=0; i--) {
730           v  = aa + ai[i];
731           vi = aj + ai[i];
732           nz = ai[i+1] - ai[i];
733           s[0] = b[i2];
734           for (j=0; j<nz; j++) {
735             xw[0] = x[vi[j]];
736             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
737           }
738           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
739           x[i2] += xw[0];
740           idiag -= 1;
741           i2    -= 1;
742         }
743         break;
744       case 2:
745         for (i=m-1; i>=0; i--) {
746           v  = aa + 4*ai[i];
747           vi = aj + ai[i];
748           nz = ai[i+1] - ai[i];
749           s[0] = b[i2]; s[1] = b[i2+1];
750           for (j=0; j<nz; j++) {
751             idx = 2*vi[j];
752             it  = 4*j;
753             xw[0] = x[idx]; xw[1] = x[1+idx];
754             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
755           }
756           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
757           x[i2]  += xw[0]; x[i2+1] += xw[1];
758           idiag  -= 4;
759           i2     -= 2;
760         }
761         break;
762       case 3:
763         for (i=m-1; i>=0; i--) {
764           v  = aa + 9*ai[i];
765           vi = aj + ai[i];
766           nz = ai[i+1] - ai[i];
767           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
768           while (nz--) {
769             idx = 3*(*vi++);
770             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
771             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
772             v  += 9;
773           }
774           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
775           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
776           idiag  -= 9;
777           i2     -= 3;
778         }
779         break;
780       case 4:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + 16*ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
786           while (nz--) {
787             idx = 4*(*vi++);
788             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
789             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
790             v  += 16;
791           }
792           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
793           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
794           idiag  -= 16;
795           i2     -= 4;
796         }
797         break;
798       case 5:
799         for (i=m-1; i>=0; i--) {
800           v  = aa + 25*ai[i];
801           vi = aj + ai[i];
802           nz = ai[i+1] - ai[i];
803           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
804           while (nz--) {
805             idx = 5*(*vi++);
806             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
807             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
808             v  += 25;
809           }
810           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
811           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
812           idiag  -= 25;
813           i2     -= 5;
814         }
815         break;
816       case 6:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 36*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
822           while (nz--) {
823             idx = 6*(*vi++);
824             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
825             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
827             v  += 36;
828           }
829           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
831           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
832           idiag  -= 36;
833           i2     -= 6;
834         }
835         break;
836       case 7:
837         for (i=m-1; i>=0; i--) {
838           v  = aa + 49*ai[i];
839           vi = aj + ai[i];
840           nz = ai[i+1] - ai[i];
841           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
842           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
843           while (nz--) {
844             idx = 7*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
848             v  += 49;
849           }
850           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
851           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
853           idiag  -= 49;
854           i2     -= 7;
855         }
856         break;
857       default:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + bs2*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862 
863           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           /* copy all rows of x that are needed into contiguous space */
865           workt = work;
866           for (j=0; j<nz; j++) {
867             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
868             workt += bs;
869           }
870           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
871           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
872 
873           idiag -= bs2;
874           i2    -= bs;
875         }
876         break;
877       }
878       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
879     }
880   }
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 
887 /*
888     Special version for direct calls from Fortran (Used in PETSc-fun3d)
889 */
890 #if defined(PETSC_HAVE_FORTRAN_CAPS)
891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
893 #define matsetvaluesblocked4_ matsetvaluesblocked4
894 #endif
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "matsetvaluesblocked4_"
898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
899 {
900   Mat               A  = *AA;
901   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
902   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
903   PetscInt          *ai    =a->i,*ailen=a->ilen;
904   PetscInt          *aj    =a->j,stepval,lastcol = -1;
905   const PetscScalar *value = v;
906   MatScalar         *ap,*aa = a->a,*bap;
907 
908   PetscFunctionBegin;
909   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
910   stepval = (n-1)*4;
911   for (k=0; k<m; k++) { /* loop over added rows */
912     row  = im[k];
913     rp   = aj + ai[row];
914     ap   = aa + 16*ai[row];
915     nrow = ailen[row];
916     low  = 0;
917     high = nrow;
918     for (l=0; l<n; l++) { /* loop over added columns */
919       col = in[l];
920       if (col <= lastcol)  low = 0;
921       else                high = nrow;
922       lastcol = col;
923       value   = v + k*(stepval+4 + l)*4;
924       while (high-low > 7) {
925         t = (low+high)/2;
926         if (rp[t] > col) high = t;
927         else             low  = t;
928       }
929       for (i=low; i<high; i++) {
930         if (rp[i] > col) break;
931         if (rp[i] == col) {
932           bap = ap +  16*i;
933           for (ii=0; ii<4; ii++,value+=stepval) {
934             for (jj=ii; jj<16; jj+=4) {
935               bap[jj] += *value++;
936             }
937           }
938           goto noinsert2;
939         }
940       }
941       N = nrow++ - 1;
942       high++; /* added new column index thus must search to one higher than before */
943       /* shift up all the later entries in this row */
944       for (ii=N; ii>=i; ii--) {
945         rp[ii+1] = rp[ii];
946         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
947       }
948       if (N >= i) {
949         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
950       }
951       rp[i] = col;
952       bap   = ap +  16*i;
953       for (ii=0; ii<4; ii++,value+=stepval) {
954         for (jj=ii; jj<16; jj+=4) {
955           bap[jj] = *value++;
956         }
957       }
958       noinsert2:;
959       low = i;
960     }
961     ailen[row] = nrow;
962   }
963   PetscFunctionReturnVoid();
964 }
965 
966 #if defined(PETSC_HAVE_FORTRAN_CAPS)
967 #define matsetvalues4_ MATSETVALUES4
968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
969 #define matsetvalues4_ matsetvalues4
970 #endif
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatSetValues4_"
974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
975 {
976   Mat         A  = *AA;
977   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
979   PetscInt    *ai=a->i,*ailen=a->ilen;
980   PetscInt    *aj=a->j,brow,bcol;
981   PetscInt    ridx,cidx,lastcol = -1;
982   MatScalar   *ap,value,*aa=a->a,*bap;
983 
984   PetscFunctionBegin;
985   for (k=0; k<m; k++) { /* loop over added rows */
986     row  = im[k]; brow = row/4;
987     rp   = aj + ai[brow];
988     ap   = aa + 16*ai[brow];
989     nrow = ailen[brow];
990     low  = 0;
991     high = nrow;
992     for (l=0; l<n; l++) { /* loop over added columns */
993       col   = in[l]; bcol = col/4;
994       ridx  = row % 4; cidx = col % 4;
995       value = v[l + k*n];
996       if (col <= lastcol)  low = 0;
997       else                high = nrow;
998       lastcol = col;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap   = ap +  16*i + 4*cidx + ridx;
1008           *bap += value;
1009           goto noinsert1;
1010         }
1011       }
1012       N = nrow++ - 1;
1013       high++; /* added new column thus must search to one higher than before */
1014       /* shift up all the later entries in this row */
1015       for (ii=N; ii>=i; ii--) {
1016         rp[ii+1] = rp[ii];
1017         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1018       }
1019       if (N>=i) {
1020         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1021       }
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1067 {
1068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       i,j,m = a->mbs;
1071 
1072   PetscFunctionBegin;
1073   if (!a->diag) {
1074     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1075     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1076     a->free_diag = PETSC_TRUE;
1077   }
1078   for (i=0; i<m; i++) {
1079     a->diag[i] = a->i[i+1];
1080     for (j=a->i[i]; j<a->i[i+1]; j++) {
1081       if (a->j[j] == i) {
1082         a->diag[i] = j;
1083         break;
1084       }
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1094 {
1095   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1098   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1099 
1100   PetscFunctionBegin;
1101   *nn = n;
1102   if (!ia) PetscFunctionReturn(0);
1103   if (symmetric) {
1104     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1105     nz   = tia[n];
1106   } else {
1107     tia = a->i; tja = a->j;
1108   }
1109 
1110   if (!blockcompressed && bs > 1) {
1111     (*nn) *= bs;
1112     /* malloc & create the natural set of indices */
1113     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1114     if (n) {
1115       (*ia)[0] = 0;
1116       for (j=1; j<bs; j++) {
1117         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1118       }
1119     }
1120 
1121     for (i=1; i<n; i++) {
1122       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1123       for (j=1; j<bs; j++) {
1124         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1125       }
1126     }
1127     if (n) {
1128       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1129     }
1130 
1131     if (inja) {
1132       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1133       cnt = 0;
1134       for (i=0; i<n; i++) {
1135         for (j=0; j<bs; j++) {
1136           for (k=tia[i]; k<tia[i+1]; k++) {
1137             for (l=0; l<bs; l++) {
1138               (*ja)[cnt++] = bs*tja[k] + l;
1139             }
1140           }
1141         }
1142       }
1143     }
1144 
1145     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1146       ierr = PetscFree(tia);CHKERRQ(ierr);
1147       ierr = PetscFree(tja);CHKERRQ(ierr);
1148     }
1149   } else if (oshift == 1) {
1150     if (symmetric) {
1151       nz = tia[A->rmap->n/bs];
1152       /*  add 1 to i and j indices */
1153       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1154       *ia = tia;
1155       if (ja) {
1156         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1157         *ja = tja;
1158       }
1159     } else {
1160       nz = a->i[A->rmap->n/bs];
1161       /* malloc space and  add 1 to i and j indices */
1162       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1163       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1164       if (ja) {
1165         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1166         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1167       }
1168     }
1169   } else {
1170     *ia = tia;
1171     if (ja) *ja = tja;
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1179 {
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   if (!ia) PetscFunctionReturn(0);
1184   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1185     ierr = PetscFree(*ia);CHKERRQ(ierr);
1186     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1194 {
1195   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199 #if defined(PETSC_USE_LOG)
1200   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1201 #endif
1202   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1205   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1206   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1207   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1208   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1209   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1210   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1211   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1212   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1213   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1214   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1215   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1216 
1217   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1218   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1219   ierr = PetscFree(A->data);CHKERRQ(ierr);
1220 
1221   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1231   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1237 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1238 {
1239   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   switch (op) {
1244   case MAT_ROW_ORIENTED:
1245     a->roworiented = flg;
1246     break;
1247   case MAT_KEEP_NONZERO_PATTERN:
1248     a->keepnonzeropattern = flg;
1249     break;
1250   case MAT_NEW_NONZERO_LOCATIONS:
1251     a->nonew = (flg ? 0 : 1);
1252     break;
1253   case MAT_NEW_NONZERO_LOCATION_ERR:
1254     a->nonew = (flg ? -1 : 0);
1255     break;
1256   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1257     a->nonew = (flg ? -2 : 0);
1258     break;
1259   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1260     a->nounused = (flg ? -1 : 0);
1261     break;
1262   case MAT_NEW_DIAGONALS:
1263   case MAT_IGNORE_OFF_PROC_ENTRIES:
1264   case MAT_USE_HASH_TABLE:
1265     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1266     break;
1267   case MAT_SPD:
1268   case MAT_SYMMETRIC:
1269   case MAT_STRUCTURALLY_SYMMETRIC:
1270   case MAT_HERMITIAN:
1271   case MAT_SYMMETRY_ETERNAL:
1272     /* These options are handled directly by MatSetOption() */
1273     break;
1274   default:
1275     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281 #undef __FUNCT__
1282 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1283 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1284 {
1285   PetscErrorCode ierr;
1286   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1287   MatScalar      *aa_i;
1288   PetscScalar    *v_i;
1289 
1290   PetscFunctionBegin;
1291   bs  = A->rmap->bs;
1292   bs2 = bs*bs;
1293   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1294 
1295   bn  = row/bs;   /* Block number */
1296   bp  = row % bs; /* Block Position */
1297   M   = ai[bn+1] - ai[bn];
1298   *nz = bs*M;
1299 
1300   if (v) {
1301     *v = 0;
1302     if (*nz) {
1303       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1304       for (i=0; i<M; i++) { /* for each block in the block row */
1305         v_i  = *v + i*bs;
1306         aa_i = aa + bs2*(ai[bn] + i);
1307         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308       }
1309     }
1310   }
1311 
1312   if (idx) {
1313     *idx = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         idx_i = *idx + i*bs;
1318         itmp  = bs*aj[ai[bn] + i];
1319         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320       }
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1328 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1340 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1341 {
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1346   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1354 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1355 {
1356   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1357   Mat            C;
1358   PetscErrorCode ierr;
1359   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1360   PetscInt       *rows,*cols,bs2=a->bs2;
1361   MatScalar      *array;
1362 
1363   PetscFunctionBegin;
1364   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1365   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1366     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1367 
1368     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1369     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1370     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1371     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1372     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1373     ierr = PetscFree(col);CHKERRQ(ierr);
1374   } else {
1375     C = *B;
1376   }
1377 
1378   array = a->a;
1379   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1380   for (i=0; i<mbs; i++) {
1381     cols[0] = i*bs;
1382     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1383     len = ai[i+1] - ai[i];
1384     for (j=0; j<len; j++) {
1385       rows[0] = (*aj++)*bs;
1386       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1387       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1388       array += bs2;
1389     }
1390   }
1391   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1392 
1393   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1395 
1396   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1397     *B = C;
1398   } else {
1399     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407 {
1408   PetscErrorCode ierr;
1409   Mat            Btrans;
1410 
1411   PetscFunctionBegin;
1412   *f   = PETSC_FALSE;
1413   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1414   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1415   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1421 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1422 {
1423   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1424   PetscErrorCode ierr;
1425   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1426   int            fd;
1427   PetscScalar    *aa;
1428   FILE           *file;
1429 
1430   PetscFunctionBegin;
1431   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1432   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1433   col_lens[0] = MAT_FILE_CLASSID;
1434 
1435   col_lens[1] = A->rmap->N;
1436   col_lens[2] = A->cmap->n;
1437   col_lens[3] = a->nz*bs2;
1438 
1439   /* store lengths of each row and write (including header) to file */
1440   count = 0;
1441   for (i=0; i<a->mbs; i++) {
1442     for (j=0; j<bs; j++) {
1443       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1444     }
1445   }
1446   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1447   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1448 
1449   /* store column indices (zero start index) */
1450   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1451   count = 0;
1452   for (i=0; i<a->mbs; i++) {
1453     for (j=0; j<bs; j++) {
1454       for (k=a->i[i]; k<a->i[i+1]; k++) {
1455         for (l=0; l<bs; l++) {
1456           jj[count++] = bs*a->j[k] + l;
1457         }
1458       }
1459     }
1460   }
1461   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1462   ierr = PetscFree(jj);CHKERRQ(ierr);
1463 
1464   /* store nonzero values */
1465   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1466   count = 0;
1467   for (i=0; i<a->mbs; i++) {
1468     for (j=0; j<bs; j++) {
1469       for (k=a->i[i]; k<a->i[i+1]; k++) {
1470         for (l=0; l<bs; l++) {
1471           aa[count++] = a->a[bs2*k + l*bs + j];
1472         }
1473       }
1474     }
1475   }
1476   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1477   ierr = PetscFree(aa);CHKERRQ(ierr);
1478 
1479   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1480   if (file) {
1481     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1482   }
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 #undef __FUNCT__
1487 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1488 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1489 {
1490   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1491   PetscErrorCode    ierr;
1492   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1493   PetscViewerFormat format;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1497   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1498     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1499   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1500     Mat aij;
1501     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1502     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1503     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1504   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1505       PetscFunctionReturn(0);
1506   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1507     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1508     for (i=0; i<a->mbs; i++) {
1509       for (j=0; j<bs; j++) {
1510         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1511         for (k=a->i[i]; k<a->i[i+1]; k++) {
1512           for (l=0; l<bs; l++) {
1513 #if defined(PETSC_USE_COMPLEX)
1514             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1515               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1516                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1517             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1518               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1519                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1520             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1522             }
1523 #else
1524             if (a->a[bs2*k + l*bs + j] != 0.0) {
1525               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1526             }
1527 #endif
1528           }
1529         }
1530         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1531       }
1532     }
1533     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1534   } else {
1535     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1536     for (i=0; i<a->mbs; i++) {
1537       for (j=0; j<bs; j++) {
1538         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1539         for (k=a->i[i]; k<a->i[i+1]; k++) {
1540           for (l=0; l<bs; l++) {
1541 #if defined(PETSC_USE_COMPLEX)
1542             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1543               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1544                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1547                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1548             } else {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1550             }
1551 #else
1552             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1553 #endif
1554           }
1555         }
1556         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1557       }
1558     }
1559     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1560   }
1561   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1562   PetscFunctionReturn(0);
1563 }
1564 
1565 #include <petscdraw.h>
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1568 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1569 {
1570   Mat               A = (Mat) Aa;
1571   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1572   PetscErrorCode    ierr;
1573   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1574   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1575   MatScalar         *aa;
1576   PetscViewer       viewer;
1577   PetscViewerFormat format;
1578 
1579   PetscFunctionBegin;
1580   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1581   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1582 
1583   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1584 
1585   /* loop over matrix elements drawing boxes */
1586 
1587   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1588     color = PETSC_DRAW_BLUE;
1589     for (i=0,row=0; i<mbs; i++,row+=bs) {
1590       for (j=a->i[i]; j<a->i[i+1]; j++) {
1591         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1592         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1593         aa  = a->a + j*bs2;
1594         for (k=0; k<bs; k++) {
1595           for (l=0; l<bs; l++) {
1596             if (PetscRealPart(*aa++) >=  0.) continue;
1597             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1598           }
1599         }
1600       }
1601     }
1602     color = PETSC_DRAW_CYAN;
1603     for (i=0,row=0; i<mbs; i++,row+=bs) {
1604       for (j=a->i[i]; j<a->i[i+1]; j++) {
1605         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1606         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1607         aa  = a->a + j*bs2;
1608         for (k=0; k<bs; k++) {
1609           for (l=0; l<bs; l++) {
1610             if (PetscRealPart(*aa++) != 0.) continue;
1611             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1612           }
1613         }
1614       }
1615     }
1616     color = PETSC_DRAW_RED;
1617     for (i=0,row=0; i<mbs; i++,row+=bs) {
1618       for (j=a->i[i]; j<a->i[i+1]; j++) {
1619         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1620         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1621         aa  = a->a + j*bs2;
1622         for (k=0; k<bs; k++) {
1623           for (l=0; l<bs; l++) {
1624             if (PetscRealPart(*aa++) <= 0.) continue;
1625             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1626           }
1627         }
1628       }
1629     }
1630   } else {
1631     /* use contour shading to indicate magnitude of values */
1632     /* first determine max of all nonzero values */
1633     PetscDraw popup;
1634     PetscReal scale,maxv = 0.0;
1635 
1636     for (i=0; i<a->nz*a->bs2; i++) {
1637       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1638     }
1639     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1640     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1641     if (popup) {
1642       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1643     }
1644     for (i=0,row=0; i<mbs; i++,row+=bs) {
1645       for (j=a->i[i]; j<a->i[i+1]; j++) {
1646         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1647         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1648         aa  = a->a + j*bs2;
1649         for (k=0; k<bs; k++) {
1650           for (l=0; l<bs; l++) {
1651             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1652             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1653           }
1654         }
1655       }
1656     }
1657   }
1658   PetscFunctionReturn(0);
1659 }
1660 
1661 #undef __FUNCT__
1662 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1663 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1664 {
1665   PetscErrorCode ierr;
1666   PetscReal      xl,yl,xr,yr,w,h;
1667   PetscDraw      draw;
1668   PetscBool      isnull;
1669 
1670   PetscFunctionBegin;
1671   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1672   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1673 
1674   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1675   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1676   xr  += w;    yr += h;  xl = -w;     yl = -h;
1677   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1678   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1679   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 #undef __FUNCT__
1684 #define __FUNCT__ "MatView_SeqBAIJ"
1685 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1686 {
1687   PetscErrorCode ierr;
1688   PetscBool      iascii,isbinary,isdraw;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1692   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1693   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1694   if (iascii) {
1695     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1696   } else if (isbinary) {
1697     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1698   } else if (isdraw) {
1699     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1700   } else {
1701     Mat B;
1702     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1703     ierr = MatView(B,viewer);CHKERRQ(ierr);
1704     ierr = MatDestroy(&B);CHKERRQ(ierr);
1705   }
1706   PetscFunctionReturn(0);
1707 }
1708 
1709 
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1712 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1713 {
1714   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1715   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1716   PetscInt    *ai = a->i,*ailen = a->ilen;
1717   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1718   MatScalar   *ap,*aa = a->a;
1719 
1720   PetscFunctionBegin;
1721   for (k=0; k<m; k++) { /* loop over rows */
1722     row = im[k]; brow = row/bs;
1723     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1724     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1725     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1726     nrow = ailen[brow];
1727     for (l=0; l<n; l++) { /* loop over columns */
1728       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1729       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1730       col  = in[l];
1731       bcol = col/bs;
1732       cidx = col%bs;
1733       ridx = row%bs;
1734       high = nrow;
1735       low  = 0; /* assume unsorted */
1736       while (high-low > 5) {
1737         t = (low+high)/2;
1738         if (rp[t] > bcol) high = t;
1739         else             low  = t;
1740       }
1741       for (i=low; i<high; i++) {
1742         if (rp[i] > bcol) break;
1743         if (rp[i] == bcol) {
1744           *v++ = ap[bs2*i+bs*cidx+ridx];
1745           goto finished;
1746         }
1747       }
1748       *v++ = 0.0;
1749 finished:;
1750     }
1751   }
1752   PetscFunctionReturn(0);
1753 }
1754 
1755 #undef __FUNCT__
1756 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1757 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1758 {
1759   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1760   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1761   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1762   PetscErrorCode    ierr;
1763   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1764   PetscBool         roworiented=a->roworiented;
1765   const PetscScalar *value     = v;
1766   MatScalar         *ap,*aa = a->a,*bap;
1767 
1768   PetscFunctionBegin;
1769   if (roworiented) {
1770     stepval = (n-1)*bs;
1771   } else {
1772     stepval = (m-1)*bs;
1773   }
1774   for (k=0; k<m; k++) { /* loop over added rows */
1775     row = im[k];
1776     if (row < 0) continue;
1777 #if defined(PETSC_USE_DEBUG)
1778     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1779 #endif
1780     rp   = aj + ai[row];
1781     ap   = aa + bs2*ai[row];
1782     rmax = imax[row];
1783     nrow = ailen[row];
1784     low  = 0;
1785     high = nrow;
1786     for (l=0; l<n; l++) { /* loop over added columns */
1787       if (in[l] < 0) continue;
1788 #if defined(PETSC_USE_DEBUG)
1789       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1790 #endif
1791       col = in[l];
1792       if (roworiented) {
1793         value = v + (k*(stepval+bs) + l)*bs;
1794       } else {
1795         value = v + (l*(stepval+bs) + k)*bs;
1796       }
1797       if (col <= lastcol) low = 0;
1798       else high = nrow;
1799       lastcol = col;
1800       while (high-low > 7) {
1801         t = (low+high)/2;
1802         if (rp[t] > col) high = t;
1803         else             low  = t;
1804       }
1805       for (i=low; i<high; i++) {
1806         if (rp[i] > col) break;
1807         if (rp[i] == col) {
1808           bap = ap +  bs2*i;
1809           if (roworiented) {
1810             if (is == ADD_VALUES) {
1811               for (ii=0; ii<bs; ii++,value+=stepval) {
1812                 for (jj=ii; jj<bs2; jj+=bs) {
1813                   bap[jj] += *value++;
1814                 }
1815               }
1816             } else {
1817               for (ii=0; ii<bs; ii++,value+=stepval) {
1818                 for (jj=ii; jj<bs2; jj+=bs) {
1819                   bap[jj] = *value++;
1820                 }
1821               }
1822             }
1823           } else {
1824             if (is == ADD_VALUES) {
1825               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1826                 for (jj=0; jj<bs; jj++) {
1827                   bap[jj] += value[jj];
1828                 }
1829                 bap += bs;
1830               }
1831             } else {
1832               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1833                 for (jj=0; jj<bs; jj++) {
1834                   bap[jj]  = value[jj];
1835                 }
1836                 bap += bs;
1837               }
1838             }
1839           }
1840           goto noinsert2;
1841         }
1842       }
1843       if (nonew == 1) goto noinsert2;
1844       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1845       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1846       N = nrow++ - 1; high++;
1847       /* shift up all the later entries in this row */
1848       for (ii=N; ii>=i; ii--) {
1849         rp[ii+1] = rp[ii];
1850         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1851       }
1852       if (N >= i) {
1853         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1854       }
1855       rp[i] = col;
1856       bap   = ap +  bs2*i;
1857       if (roworiented) {
1858         for (ii=0; ii<bs; ii++,value+=stepval) {
1859           for (jj=ii; jj<bs2; jj+=bs) {
1860             bap[jj] = *value++;
1861           }
1862         }
1863       } else {
1864         for (ii=0; ii<bs; ii++,value+=stepval) {
1865           for (jj=0; jj<bs; jj++) {
1866             *bap++ = *value++;
1867           }
1868         }
1869       }
1870 noinsert2:;
1871       low = i;
1872     }
1873     ailen[row] = nrow;
1874   }
1875   PetscFunctionReturn(0);
1876 }
1877 
1878 #undef __FUNCT__
1879 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1880 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1881 {
1882   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1883   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1884   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1885   PetscErrorCode ierr;
1886   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1887   MatScalar      *aa  = a->a,*ap;
1888   PetscReal      ratio=0.6;
1889 
1890   PetscFunctionBegin;
1891   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1892 
1893   if (m) rmax = ailen[0];
1894   for (i=1; i<mbs; i++) {
1895     /* move each row back by the amount of empty slots (fshift) before it*/
1896     fshift += imax[i-1] - ailen[i-1];
1897     rmax    = PetscMax(rmax,ailen[i]);
1898     if (fshift) {
1899       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1900       N  = ailen[i];
1901       for (j=0; j<N; j++) {
1902         ip[j-fshift] = ip[j];
1903 
1904         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1905       }
1906     }
1907     ai[i] = ai[i-1] + ailen[i-1];
1908   }
1909   if (mbs) {
1910     fshift += imax[mbs-1] - ailen[mbs-1];
1911     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1912   }
1913 
1914   /* reset ilen and imax for each row */
1915   a->nonzerorowcnt = 0;
1916   for (i=0; i<mbs; i++) {
1917     ailen[i] = imax[i] = ai[i+1] - ai[i];
1918     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1919   }
1920   a->nz = ai[mbs];
1921 
1922   /* diagonals may have moved, so kill the diagonal pointers */
1923   a->idiagvalid = PETSC_FALSE;
1924   if (fshift && a->diag) {
1925     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1926     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1927     a->diag = 0;
1928   }
1929   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1930   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1931   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1932   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1933 
1934   A->info.mallocs    += a->reallocs;
1935   a->reallocs         = 0;
1936   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1937 
1938   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 /*
1943    This function returns an array of flags which indicate the locations of contiguous
1944    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1945    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1946    Assume: sizes should be long enough to hold all the values.
1947 */
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1950 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1951 {
1952   PetscInt  i,j,k,row;
1953   PetscBool flg;
1954 
1955   PetscFunctionBegin;
1956   for (i=0,j=0; i<n; j++) {
1957     row = idx[i];
1958     if (row%bs!=0) { /* Not the begining of a block */
1959       sizes[j] = 1;
1960       i++;
1961     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1962       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1963       i++;
1964     } else { /* Begining of the block, so check if the complete block exists */
1965       flg = PETSC_TRUE;
1966       for (k=1; k<bs; k++) {
1967         if (row+k != idx[i+k]) { /* break in the block */
1968           flg = PETSC_FALSE;
1969           break;
1970         }
1971       }
1972       if (flg) { /* No break in the bs */
1973         sizes[j] = bs;
1974         i       += bs;
1975       } else {
1976         sizes[j] = 1;
1977         i++;
1978       }
1979     }
1980   }
1981   *bs_max = j;
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1987 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1988 {
1989   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1990   PetscErrorCode    ierr;
1991   PetscInt          i,j,k,count,*rows;
1992   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1993   PetscScalar       zero = 0.0;
1994   MatScalar         *aa;
1995   const PetscScalar *xx;
1996   PetscScalar       *bb;
1997 
1998   PetscFunctionBegin;
1999   /* fix right hand side if needed */
2000   if (x && b) {
2001     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2002     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2003     for (i=0; i<is_n; i++) {
2004       bb[is_idx[i]] = diag*xx[is_idx[i]];
2005     }
2006     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2007     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2008   }
2009 
2010   /* Make a copy of the IS and  sort it */
2011   /* allocate memory for rows,sizes */
2012   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2013 
2014   /* copy IS values to rows, and sort them */
2015   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2016   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2017 
2018   if (baij->keepnonzeropattern) {
2019     for (i=0; i<is_n; i++) sizes[i] = 1;
2020     bs_max          = is_n;
2021   } else {
2022     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2023     A->nonzerostate++;
2024   }
2025 
2026   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2027     row = rows[j];
2028     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2029     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2030     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2031     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2032       if (diag != (PetscScalar)0.0) {
2033         if (baij->ilen[row/bs] > 0) {
2034           baij->ilen[row/bs]       = 1;
2035           baij->j[baij->i[row/bs]] = row/bs;
2036 
2037           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2038         }
2039         /* Now insert all the diagonal values for this bs */
2040         for (k=0; k<bs; k++) {
2041           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2042         }
2043       } else { /* (diag == 0.0) */
2044         baij->ilen[row/bs] = 0;
2045       } /* end (diag == 0.0) */
2046     } else { /* (sizes[i] != bs) */
2047 #if defined(PETSC_USE_DEBUG)
2048       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2049 #endif
2050       for (k=0; k<count; k++) {
2051         aa[0] =  zero;
2052         aa   += bs;
2053       }
2054       if (diag != (PetscScalar)0.0) {
2055         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2056       }
2057     }
2058   }
2059 
2060   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2061   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2067 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2068 {
2069   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2070   PetscErrorCode    ierr;
2071   PetscInt          i,j,k,count;
2072   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2073   PetscScalar       zero = 0.0;
2074   MatScalar         *aa;
2075   const PetscScalar *xx;
2076   PetscScalar       *bb;
2077   PetscBool         *zeroed,vecs = PETSC_FALSE;
2078 
2079   PetscFunctionBegin;
2080   /* fix right hand side if needed */
2081   if (x && b) {
2082     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2083     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2084     vecs = PETSC_TRUE;
2085   }
2086 
2087   /* zero the columns */
2088   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2089   for (i=0; i<is_n; i++) {
2090     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2091     zeroed[is_idx[i]] = PETSC_TRUE;
2092   }
2093   for (i=0; i<A->rmap->N; i++) {
2094     if (!zeroed[i]) {
2095       row = i/bs;
2096       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2097         for (k=0; k<bs; k++) {
2098           col = bs*baij->j[j] + k;
2099           if (zeroed[col]) {
2100             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2101             if (vecs) bb[i] -= aa[0]*xx[col];
2102             aa[0] = 0.0;
2103           }
2104         }
2105       }
2106     } else if (vecs) bb[i] = diag*xx[i];
2107   }
2108   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2109   if (vecs) {
2110     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2112   }
2113 
2114   /* zero the rows */
2115   for (i=0; i<is_n; i++) {
2116     row   = is_idx[i];
2117     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2118     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2119     for (k=0; k<count; k++) {
2120       aa[0] =  zero;
2121       aa   += bs;
2122     }
2123     if (diag != (PetscScalar)0.0) {
2124       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2125     }
2126   }
2127   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2133 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2134 {
2135   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2136   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2137   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2138   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2139   PetscErrorCode ierr;
2140   PetscInt       ridx,cidx,bs2=a->bs2;
2141   PetscBool      roworiented=a->roworiented;
2142   MatScalar      *ap,value,*aa=a->a,*bap;
2143 
2144   PetscFunctionBegin;
2145   for (k=0; k<m; k++) { /* loop over added rows */
2146     row  = im[k];
2147     brow = row/bs;
2148     if (row < 0) continue;
2149 #if defined(PETSC_USE_DEBUG)
2150     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2151 #endif
2152     rp   = aj + ai[brow];
2153     ap   = aa + bs2*ai[brow];
2154     rmax = imax[brow];
2155     nrow = ailen[brow];
2156     low  = 0;
2157     high = nrow;
2158     for (l=0; l<n; l++) { /* loop over added columns */
2159       if (in[l] < 0) continue;
2160 #if defined(PETSC_USE_DEBUG)
2161       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2162 #endif
2163       col  = in[l]; bcol = col/bs;
2164       ridx = row % bs; cidx = col % bs;
2165       if (roworiented) {
2166         value = v[l + k*n];
2167       } else {
2168         value = v[k + l*m];
2169       }
2170       if (col <= lastcol) low = 0; else high = nrow;
2171       lastcol = col;
2172       while (high-low > 7) {
2173         t = (low+high)/2;
2174         if (rp[t] > bcol) high = t;
2175         else              low  = t;
2176       }
2177       for (i=low; i<high; i++) {
2178         if (rp[i] > bcol) break;
2179         if (rp[i] == bcol) {
2180           bap = ap +  bs2*i + bs*cidx + ridx;
2181           if (is == ADD_VALUES) *bap += value;
2182           else                  *bap  = value;
2183           goto noinsert1;
2184         }
2185       }
2186       if (nonew == 1) goto noinsert1;
2187       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2188       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2189       N = nrow++ - 1; high++;
2190       /* shift up all the later entries in this row */
2191       for (ii=N; ii>=i; ii--) {
2192         rp[ii+1] = rp[ii];
2193         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2194       }
2195       if (N>=i) {
2196         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2197       }
2198       rp[i]                      = bcol;
2199       ap[bs2*i + bs*cidx + ridx] = value;
2200       a->nz++;
2201       A->nonzerostate++;
2202 noinsert1:;
2203       low = i;
2204     }
2205     ailen[brow] = nrow;
2206   }
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2212 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2213 {
2214   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2215   Mat            outA;
2216   PetscErrorCode ierr;
2217   PetscBool      row_identity,col_identity;
2218 
2219   PetscFunctionBegin;
2220   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2221   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2222   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2223   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2224 
2225   outA            = inA;
2226   inA->factortype = MAT_FACTOR_LU;
2227 
2228   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2229 
2230   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2231   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2232   a->row = row;
2233   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2234   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2235   a->col = col;
2236 
2237   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2238   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2239   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2240   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2241 
2242   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2243   if (!a->solve_work) {
2244     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2245     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2246   }
2247   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2253 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2254 {
2255   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2256   PetscInt    i,nz,mbs;
2257 
2258   PetscFunctionBegin;
2259   nz  = baij->maxnz;
2260   mbs = baij->mbs;
2261   for (i=0; i<nz; i++) {
2262     baij->j[i] = indices[i];
2263   }
2264   baij->nz = nz;
2265   for (i=0; i<mbs; i++) {
2266     baij->ilen[i] = baij->imax[i];
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2273 /*@
2274     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2275        in the matrix.
2276 
2277   Input Parameters:
2278 +  mat - the SeqBAIJ matrix
2279 -  indices - the column indices
2280 
2281   Level: advanced
2282 
2283   Notes:
2284     This can be called if you have precomputed the nonzero structure of the
2285   matrix and want to provide it to the matrix object to improve the performance
2286   of the MatSetValues() operation.
2287 
2288     You MUST have set the correct numbers of nonzeros per row in the call to
2289   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2290 
2291     MUST be called before any calls to MatSetValues();
2292 
2293 @*/
2294 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2295 {
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2300   PetscValidPointer(indices,2);
2301   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2307 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2308 {
2309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2310   PetscErrorCode ierr;
2311   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2312   PetscReal      atmp;
2313   PetscScalar    *x,zero = 0.0;
2314   MatScalar      *aa;
2315   PetscInt       ncols,brow,krow,kcol;
2316 
2317   PetscFunctionBegin;
2318   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2319   bs  = A->rmap->bs;
2320   aa  = a->a;
2321   ai  = a->i;
2322   aj  = a->j;
2323   mbs = a->mbs;
2324 
2325   ierr = VecSet(v,zero);CHKERRQ(ierr);
2326   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2327   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2328   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2329   for (i=0; i<mbs; i++) {
2330     ncols = ai[1] - ai[0]; ai++;
2331     brow  = bs*i;
2332     for (j=0; j<ncols; j++) {
2333       for (kcol=0; kcol<bs; kcol++) {
2334         for (krow=0; krow<bs; krow++) {
2335           atmp = PetscAbsScalar(*aa);aa++;
2336           row  = brow + krow;   /* row index */
2337           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2338         }
2339       }
2340       aj++;
2341     }
2342   }
2343   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatCopy_SeqBAIJ"
2349 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2350 {
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   /* If the two matrices have the same copy implementation, use fast copy. */
2355   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2356     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2357     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2358     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2359 
2360     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2361     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2362     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2363   } else {
2364     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2371 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2372 {
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2382 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2383 {
2384   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2385 
2386   PetscFunctionBegin;
2387   *array = a->a;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2393 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2394 {
2395   PetscFunctionBegin;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2401 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2402 {
2403   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2404   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2405   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2406   PetscErrorCode ierr;
2407 
2408   PetscFunctionBegin;
2409   /* Set the number of nonzeros in the new matrix */
2410   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2416 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2417 {
2418   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2419   PetscErrorCode ierr;
2420   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2421   PetscBLASInt   one=1;
2422 
2423   PetscFunctionBegin;
2424   if (str == SAME_NONZERO_PATTERN) {
2425     PetscScalar  alpha = a;
2426     PetscBLASInt bnz;
2427     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2428     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2429     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2430   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2431     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2432   } else {
2433     Mat      B;
2434     PetscInt *nnz;
2435     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2436     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2437     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2438     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2439     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2440     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2441     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2442     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2443     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2444     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2445     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2446     ierr = PetscFree(nnz);CHKERRQ(ierr);
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2454 {
2455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2457   MatScalar   *aa = a->a;
2458 
2459   PetscFunctionBegin;
2460   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2467 {
2468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2469   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2470   MatScalar   *aa = a->a;
2471 
2472   PetscFunctionBegin;
2473   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2479 /*
2480     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2481 */
2482 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2483 {
2484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2485   PetscErrorCode ierr;
2486   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2487   PetscInt       nz = a->i[m],row,*jj,mr,col;
2488 
2489   PetscFunctionBegin;
2490   *nn = n;
2491   if (!ia) PetscFunctionReturn(0);
2492   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2493   else {
2494     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2495     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2496     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2497     jj   = a->j;
2498     for (i=0; i<nz; i++) {
2499       collengths[jj[i]]++;
2500     }
2501     cia[0] = oshift;
2502     for (i=0; i<n; i++) {
2503       cia[i+1] = cia[i] + collengths[i];
2504     }
2505     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2506     jj   = a->j;
2507     for (row=0; row<m; row++) {
2508       mr = a->i[row+1] - a->i[row];
2509       for (i=0; i<mr; i++) {
2510         col = *jj++;
2511 
2512         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2513       }
2514     }
2515     ierr = PetscFree(collengths);CHKERRQ(ierr);
2516     *ia  = cia; *ja = cja;
2517   }
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2523 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2524 {
2525   PetscErrorCode ierr;
2526 
2527   PetscFunctionBegin;
2528   if (!ia) PetscFunctionReturn(0);
2529   ierr = PetscFree(*ia);CHKERRQ(ierr);
2530   ierr = PetscFree(*ja);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 /*
2535  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2536  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2537  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2538  */
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2541 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2542 {
2543   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2544   PetscErrorCode ierr;
2545   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2546   PetscInt       nz = a->i[m],row,*jj,mr,col;
2547   PetscInt       *cspidx;
2548 
2549   PetscFunctionBegin;
2550   *nn = n;
2551   if (!ia) PetscFunctionReturn(0);
2552 
2553   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2554   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2555   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2556   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2557   jj   = a->j;
2558   for (i=0; i<nz; i++) {
2559     collengths[jj[i]]++;
2560   }
2561   cia[0] = oshift;
2562   for (i=0; i<n; i++) {
2563     cia[i+1] = cia[i] + collengths[i];
2564   }
2565   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2566   jj   = a->j;
2567   for (row=0; row<m; row++) {
2568     mr = a->i[row+1] - a->i[row];
2569     for (i=0; i<mr; i++) {
2570       col = *jj++;
2571       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2572       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2573     }
2574   }
2575   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2576   *ia    = cia; *ja = cja;
2577   *spidx = cspidx;
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2583 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2584 {
2585   PetscErrorCode ierr;
2586 
2587   PetscFunctionBegin;
2588   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2589   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 /* -------------------------------------------------------------------*/
2594 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2595                                        MatGetRow_SeqBAIJ,
2596                                        MatRestoreRow_SeqBAIJ,
2597                                        MatMult_SeqBAIJ_N,
2598                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2599                                        MatMultTranspose_SeqBAIJ,
2600                                        MatMultTransposeAdd_SeqBAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                /* 10*/ 0,
2605                                        MatLUFactor_SeqBAIJ,
2606                                        0,
2607                                        0,
2608                                        MatTranspose_SeqBAIJ,
2609                                /* 15*/ MatGetInfo_SeqBAIJ,
2610                                        MatEqual_SeqBAIJ,
2611                                        MatGetDiagonal_SeqBAIJ,
2612                                        MatDiagonalScale_SeqBAIJ,
2613                                        MatNorm_SeqBAIJ,
2614                                /* 20*/ 0,
2615                                        MatAssemblyEnd_SeqBAIJ,
2616                                        MatSetOption_SeqBAIJ,
2617                                        MatZeroEntries_SeqBAIJ,
2618                                /* 24*/ MatZeroRows_SeqBAIJ,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                /* 29*/ MatSetUp_SeqBAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 34*/ MatDuplicate_SeqBAIJ,
2629                                        0,
2630                                        0,
2631                                        MatILUFactor_SeqBAIJ,
2632                                        0,
2633                                /* 39*/ MatAXPY_SeqBAIJ,
2634                                        MatGetSubMatrices_SeqBAIJ,
2635                                        MatIncreaseOverlap_SeqBAIJ,
2636                                        MatGetValues_SeqBAIJ,
2637                                        MatCopy_SeqBAIJ,
2638                                /* 44*/ 0,
2639                                        MatScale_SeqBAIJ,
2640                                        0,
2641                                        0,
2642                                        MatZeroRowsColumns_SeqBAIJ,
2643                                /* 49*/ 0,
2644                                        MatGetRowIJ_SeqBAIJ,
2645                                        MatRestoreRowIJ_SeqBAIJ,
2646                                        MatGetColumnIJ_SeqBAIJ,
2647                                        MatRestoreColumnIJ_SeqBAIJ,
2648                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        MatSetValuesBlocked_SeqBAIJ,
2653                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2654                                        MatDestroy_SeqBAIJ,
2655                                        MatView_SeqBAIJ,
2656                                        0,
2657                                        0,
2658                                /* 64*/ 0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2664                                        0,
2665                                        MatConvert_Basic,
2666                                        0,
2667                                        0,
2668                                /* 74*/ 0,
2669                                        MatFDColoringApply_BAIJ,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                /* 79*/ 0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        MatLoad_SeqBAIJ,
2678                                /* 84*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /* 89*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /* 94*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 99*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /*104*/ 0,
2699                                        MatRealPart_SeqBAIJ,
2700                                        MatImaginaryPart_SeqBAIJ,
2701                                        0,
2702                                        0,
2703                                /*109*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        MatMissingDiagonal_SeqBAIJ,
2708                                /*114*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /*119*/ 0,
2714                                        0,
2715                                        MatMultHermitianTranspose_SeqBAIJ,
2716                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2717                                        0,
2718                                /*124*/ 0,
2719                                        0,
2720                                        MatInvertBlockDiagonal_SeqBAIJ,
2721                                        0,
2722                                        0,
2723                                /*129*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                /*134*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*139*/ 0,
2734                                        0,
2735                                        0,
2736                                        MatFDColoringSetUp_SeqXAIJ
2737 };
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2741 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2742 {
2743   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2744   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2745   PetscErrorCode ierr;
2746 
2747   PetscFunctionBegin;
2748   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2749 
2750   /* allocate space for values if not already there */
2751   if (!aij->saved_values) {
2752     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2753     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2754   }
2755 
2756   /* copy values over */
2757   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2763 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2764 {
2765   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2766   PetscErrorCode ierr;
2767   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2768 
2769   PetscFunctionBegin;
2770   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2771   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2772 
2773   /* copy values over */
2774   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2779 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2783 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2784 {
2785   Mat_SeqBAIJ    *b;
2786   PetscErrorCode ierr;
2787   PetscInt       i,mbs,nbs,bs2;
2788   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2789 
2790   PetscFunctionBegin;
2791   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2792   if (nz == MAT_SKIP_ALLOCATION) {
2793     skipallocation = PETSC_TRUE;
2794     nz             = 0;
2795   }
2796 
2797   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2798   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2799   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2800   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2801 
2802   B->preallocated = PETSC_TRUE;
2803 
2804   mbs = B->rmap->n/bs;
2805   nbs = B->cmap->n/bs;
2806   bs2 = bs*bs;
2807 
2808   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2809 
2810   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2811   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2812   if (nnz) {
2813     for (i=0; i<mbs; i++) {
2814       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2815       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2816     }
2817   }
2818 
2819   b    = (Mat_SeqBAIJ*)B->data;
2820   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2821   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2822   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2823 
2824   if (!flg) {
2825     switch (bs) {
2826     case 1:
2827       B->ops->mult    = MatMult_SeqBAIJ_1;
2828       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2829       break;
2830     case 2:
2831       B->ops->mult    = MatMult_SeqBAIJ_2;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2833       break;
2834     case 3:
2835       B->ops->mult    = MatMult_SeqBAIJ_3;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2837       break;
2838     case 4:
2839       B->ops->mult    = MatMult_SeqBAIJ_4;
2840       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2841       break;
2842     case 5:
2843       B->ops->mult    = MatMult_SeqBAIJ_5;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2845       break;
2846     case 6:
2847       B->ops->mult    = MatMult_SeqBAIJ_6;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2849       break;
2850     case 7:
2851       B->ops->mult    = MatMult_SeqBAIJ_7;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2853       break;
2854     case 15:
2855       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2856       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2857       break;
2858     default:
2859       B->ops->mult    = MatMult_SeqBAIJ_N;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2861       break;
2862     }
2863   }
2864   B->ops->sor = MatSOR_SeqBAIJ;
2865   b->mbs = mbs;
2866   b->nbs = nbs;
2867   if (!skipallocation) {
2868     if (!b->imax) {
2869       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2870       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2871 
2872       b->free_imax_ilen = PETSC_TRUE;
2873     }
2874     /* b->ilen will count nonzeros in each block row so far. */
2875     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2876     if (!nnz) {
2877       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2878       else if (nz < 0) nz = 1;
2879       for (i=0; i<mbs; i++) b->imax[i] = nz;
2880       nz = nz*mbs;
2881     } else {
2882       nz = 0;
2883       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2884     }
2885 
2886     /* allocate the matrix space */
2887     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2888     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2889     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2890     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2891     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2892 
2893     b->singlemalloc = PETSC_TRUE;
2894     b->i[0]         = 0;
2895     for (i=1; i<mbs+1; i++) {
2896       b->i[i] = b->i[i-1] + b->imax[i-1];
2897     }
2898     b->free_a  = PETSC_TRUE;
2899     b->free_ij = PETSC_TRUE;
2900 #if defined(PETSC_THREADCOMM_ACTIVE)
2901     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2902 #endif
2903   } else {
2904     b->free_a  = PETSC_FALSE;
2905     b->free_ij = PETSC_FALSE;
2906   }
2907 
2908   b->bs2              = bs2;
2909   b->mbs              = mbs;
2910   b->nz               = 0;
2911   b->maxnz            = nz;
2912   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2913   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2914   PetscFunctionReturn(0);
2915 }
2916 
2917 #undef __FUNCT__
2918 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2919 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2920 {
2921   PetscInt       i,m,nz,nz_max=0,*nnz;
2922   PetscScalar    *values=0;
2923   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2924   PetscErrorCode ierr;
2925 
2926   PetscFunctionBegin;
2927   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2928   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2929   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2930   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2931   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2932   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2933   m    = B->rmap->n/bs;
2934 
2935   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2936   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2937   for (i=0; i<m; i++) {
2938     nz = ii[i+1]- ii[i];
2939     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2940     nz_max = PetscMax(nz_max, nz);
2941     nnz[i] = nz;
2942   }
2943   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2944   ierr = PetscFree(nnz);CHKERRQ(ierr);
2945 
2946   values = (PetscScalar*)V;
2947   if (!values) {
2948     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2949   }
2950   for (i=0; i<m; i++) {
2951     PetscInt          ncols  = ii[i+1] - ii[i];
2952     const PetscInt    *icols = jj + ii[i];
2953     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2954     if (!roworiented) {
2955       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2956     } else {
2957       PetscInt j;
2958       for (j=0; j<ncols; j++) {
2959         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2960         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2961       }
2962     }
2963   }
2964   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2965   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2966   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2967   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2968   PetscFunctionReturn(0);
2969 }
2970 
2971 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2972 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2973 #if defined(PETSC_HAVE_MUMPS)
2974 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2975 #endif
2976 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2977 
2978 /*MC
2979    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2980    block sparse compressed row format.
2981 
2982    Options Database Keys:
2983 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2984 
2985   Level: beginner
2986 
2987 .seealso: MatCreateSeqBAIJ()
2988 M*/
2989 
2990 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2991 
2992 #undef __FUNCT__
2993 #define __FUNCT__ "MatCreate_SeqBAIJ"
2994 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2995 {
2996   PetscErrorCode ierr;
2997   PetscMPIInt    size;
2998   Mat_SeqBAIJ    *b;
2999 
3000   PetscFunctionBegin;
3001   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3002   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3003 
3004   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3005   B->data = (void*)b;
3006   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3007 
3008   b->row          = 0;
3009   b->col          = 0;
3010   b->icol         = 0;
3011   b->reallocs     = 0;
3012   b->saved_values = 0;
3013 
3014   b->roworiented        = PETSC_TRUE;
3015   b->nonew              = 0;
3016   b->diag               = 0;
3017   B->spptr              = 0;
3018   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3019   b->keepnonzeropattern = PETSC_FALSE;
3020   b->xtoy               = 0;
3021   b->XtoY               = 0;
3022 
3023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3026 #if defined(PETSC_HAVE_MUMPS)
3027   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3028 #endif
3029   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3030   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3040   PetscFunctionReturn(0);
3041 }
3042 
3043 #undef __FUNCT__
3044 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3045 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3046 {
3047   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3048   PetscErrorCode ierr;
3049   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3050 
3051   PetscFunctionBegin;
3052   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3053 
3054   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3055     c->imax           = a->imax;
3056     c->ilen           = a->ilen;
3057     c->free_imax_ilen = PETSC_FALSE;
3058   } else {
3059     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3060     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3061     for (i=0; i<mbs; i++) {
3062       c->imax[i] = a->imax[i];
3063       c->ilen[i] = a->ilen[i];
3064     }
3065     c->free_imax_ilen = PETSC_TRUE;
3066   }
3067 
3068   /* allocate the matrix space */
3069   if (mallocmatspace) {
3070     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3071       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3072       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3073 
3074       c->i            = a->i;
3075       c->j            = a->j;
3076       c->singlemalloc = PETSC_FALSE;
3077       c->free_a       = PETSC_TRUE;
3078       c->free_ij      = PETSC_FALSE;
3079       c->parent       = A;
3080       C->preallocated = PETSC_TRUE;
3081       C->assembled    = PETSC_TRUE;
3082 
3083       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3084       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3085       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3086     } else {
3087       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3088       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3089 
3090       c->singlemalloc = PETSC_TRUE;
3091       c->free_a       = PETSC_TRUE;
3092       c->free_ij      = PETSC_TRUE;
3093 
3094       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3095       if (mbs > 0) {
3096         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3097         if (cpvalues == MAT_COPY_VALUES) {
3098           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3099         } else {
3100           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3101         }
3102       }
3103       C->preallocated = PETSC_TRUE;
3104       C->assembled    = PETSC_TRUE;
3105     }
3106   }
3107 
3108   c->roworiented = a->roworiented;
3109   c->nonew       = a->nonew;
3110 
3111   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3112   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3113 
3114   c->bs2         = a->bs2;
3115   c->mbs         = a->mbs;
3116   c->nbs         = a->nbs;
3117 
3118   if (a->diag) {
3119     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3120       c->diag      = a->diag;
3121       c->free_diag = PETSC_FALSE;
3122     } else {
3123       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3124       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3125       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3126       c->free_diag = PETSC_TRUE;
3127     }
3128   } else c->diag = 0;
3129 
3130   c->nz         = a->nz;
3131   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3132   c->solve_work = NULL;
3133   c->mult_work  = NULL;
3134   c->sor_workt  = NULL;
3135   c->sor_work   = NULL;
3136 
3137   c->compressedrow.use   = a->compressedrow.use;
3138   c->compressedrow.nrows = a->compressedrow.nrows;
3139   if (a->compressedrow.use) {
3140     i    = a->compressedrow.nrows;
3141     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3142     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3143     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3144     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3145   } else {
3146     c->compressedrow.use    = PETSC_FALSE;
3147     c->compressedrow.i      = NULL;
3148     c->compressedrow.rindex = NULL;
3149   }
3150   C->nonzerostate = A->nonzerostate;
3151 
3152   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3153   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3154   PetscFunctionReturn(0);
3155 }
3156 
3157 #undef __FUNCT__
3158 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3159 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3160 {
3161   PetscErrorCode ierr;
3162 
3163   PetscFunctionBegin;
3164   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3165   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3166   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3167   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3168   PetscFunctionReturn(0);
3169 }
3170 
3171 #undef __FUNCT__
3172 #define __FUNCT__ "MatLoad_SeqBAIJ"
3173 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3174 {
3175   Mat_SeqBAIJ    *a;
3176   PetscErrorCode ierr;
3177   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3178   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3179   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3180   PetscInt       *masked,nmask,tmp,bs2,ishift;
3181   PetscMPIInt    size;
3182   int            fd;
3183   PetscScalar    *aa;
3184   MPI_Comm       comm;
3185 
3186   PetscFunctionBegin;
3187   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3188   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3189   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3190   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3191   if (bs < 0) bs = 1;
3192   bs2  = bs*bs;
3193 
3194   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3195   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3196   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3197   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3198   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3199   M = header[1]; N = header[2]; nz = header[3];
3200 
3201   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3202   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3203 
3204   /*
3205      This code adds extra rows to make sure the number of rows is
3206     divisible by the blocksize
3207   */
3208   mbs        = M/bs;
3209   extra_rows = bs - M + bs*(mbs);
3210   if (extra_rows == bs) extra_rows = 0;
3211   else mbs++;
3212   if (extra_rows) {
3213     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3214   }
3215 
3216   /* Set global sizes if not already set */
3217   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3218     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3219   } else { /* Check if the matrix global sizes are correct */
3220     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3221     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3222       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3223     }
3224     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3225   }
3226 
3227   /* read in row lengths */
3228   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3229   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3230   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3231 
3232   /* read in column indices */
3233   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3234   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3235   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3236 
3237   /* loop over row lengths determining block row lengths */
3238   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3239   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3240   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3241   rowcount = 0;
3242   nzcount  = 0;
3243   for (i=0; i<mbs; i++) {
3244     nmask = 0;
3245     for (j=0; j<bs; j++) {
3246       kmax = rowlengths[rowcount];
3247       for (k=0; k<kmax; k++) {
3248         tmp = jj[nzcount++]/bs;
3249         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3250       }
3251       rowcount++;
3252     }
3253     browlengths[i] += nmask;
3254     /* zero out the mask elements we set */
3255     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3256   }
3257 
3258   /* Do preallocation  */
3259   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3260   a    = (Mat_SeqBAIJ*)newmat->data;
3261 
3262   /* set matrix "i" values */
3263   a->i[0] = 0;
3264   for (i=1; i<= mbs; i++) {
3265     a->i[i]      = a->i[i-1] + browlengths[i-1];
3266     a->ilen[i-1] = browlengths[i-1];
3267   }
3268   a->nz = 0;
3269   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3270 
3271   /* read in nonzero values */
3272   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3273   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3274   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3275 
3276   /* set "a" and "j" values into matrix */
3277   nzcount = 0; jcount = 0;
3278   for (i=0; i<mbs; i++) {
3279     nzcountb = nzcount;
3280     nmask    = 0;
3281     for (j=0; j<bs; j++) {
3282       kmax = rowlengths[i*bs+j];
3283       for (k=0; k<kmax; k++) {
3284         tmp = jj[nzcount++]/bs;
3285         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3286       }
3287     }
3288     /* sort the masked values */
3289     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3290 
3291     /* set "j" values into matrix */
3292     maskcount = 1;
3293     for (j=0; j<nmask; j++) {
3294       a->j[jcount++]  = masked[j];
3295       mask[masked[j]] = maskcount++;
3296     }
3297     /* set "a" values into matrix */
3298     ishift = bs2*a->i[i];
3299     for (j=0; j<bs; j++) {
3300       kmax = rowlengths[i*bs+j];
3301       for (k=0; k<kmax; k++) {
3302         tmp       = jj[nzcountb]/bs;
3303         block     = mask[tmp] - 1;
3304         point     = jj[nzcountb] - bs*tmp;
3305         idx       = ishift + bs2*block + j + bs*point;
3306         a->a[idx] = (MatScalar)aa[nzcountb++];
3307       }
3308     }
3309     /* zero out the mask elements we set */
3310     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3311   }
3312   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3313 
3314   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3315   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3316   ierr = PetscFree(aa);CHKERRQ(ierr);
3317   ierr = PetscFree(jj);CHKERRQ(ierr);
3318   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3319 
3320   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3321   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3322   PetscFunctionReturn(0);
3323 }
3324 
3325 #undef __FUNCT__
3326 #define __FUNCT__ "MatCreateSeqBAIJ"
3327 /*@C
3328    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3329    compressed row) format.  For good matrix assembly performance the
3330    user should preallocate the matrix storage by setting the parameter nz
3331    (or the array nnz).  By setting these parameters accurately, performance
3332    during matrix assembly can be increased by more than a factor of 50.
3333 
3334    Collective on MPI_Comm
3335 
3336    Input Parameters:
3337 +  comm - MPI communicator, set to PETSC_COMM_SELF
3338 .  bs - size of block
3339 .  m - number of rows
3340 .  n - number of columns
3341 .  nz - number of nonzero blocks  per block row (same for all rows)
3342 -  nnz - array containing the number of nonzero blocks in the various block rows
3343          (possibly different for each block row) or NULL
3344 
3345    Output Parameter:
3346 .  A - the matrix
3347 
3348    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3349    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3350    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3351 
3352    Options Database Keys:
3353 .   -mat_no_unroll - uses code that does not unroll the loops in the
3354                      block calculations (much slower)
3355 .    -mat_block_size - size of the blocks to use
3356 
3357    Level: intermediate
3358 
3359    Notes:
3360    The number of rows and columns must be divisible by blocksize.
3361 
3362    If the nnz parameter is given then the nz parameter is ignored
3363 
3364    A nonzero block is any block that as 1 or more nonzeros in it
3365 
3366    The block AIJ format is fully compatible with standard Fortran 77
3367    storage.  That is, the stored row and column indices can begin at
3368    either one (as in Fortran) or zero.  See the users' manual for details.
3369 
3370    Specify the preallocated storage with either nz or nnz (not both).
3371    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3372    allocation.  See Users-Manual: ch_mat for details.
3373    matrices.
3374 
3375 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3376 @*/
3377 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3378 {
3379   PetscErrorCode ierr;
3380 
3381   PetscFunctionBegin;
3382   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3383   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3384   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3385   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3386   PetscFunctionReturn(0);
3387 }
3388 
3389 #undef __FUNCT__
3390 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3391 /*@C
3392    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3393    per row in the matrix. For good matrix assembly performance the
3394    user should preallocate the matrix storage by setting the parameter nz
3395    (or the array nnz).  By setting these parameters accurately, performance
3396    during matrix assembly can be increased by more than a factor of 50.
3397 
3398    Collective on MPI_Comm
3399 
3400    Input Parameters:
3401 +  B - the matrix
3402 .  bs - size of block
3403 .  nz - number of block nonzeros per block row (same for all rows)
3404 -  nnz - array containing the number of block nonzeros in the various block rows
3405          (possibly different for each block row) or NULL
3406 
3407    Options Database Keys:
3408 .   -mat_no_unroll - uses code that does not unroll the loops in the
3409                      block calculations (much slower)
3410 .    -mat_block_size - size of the blocks to use
3411 
3412    Level: intermediate
3413 
3414    Notes:
3415    If the nnz parameter is given then the nz parameter is ignored
3416 
3417    You can call MatGetInfo() to get information on how effective the preallocation was;
3418    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3419    You can also run with the option -info and look for messages with the string
3420    malloc in them to see if additional memory allocation was needed.
3421 
3422    The block AIJ format is fully compatible with standard Fortran 77
3423    storage.  That is, the stored row and column indices can begin at
3424    either one (as in Fortran) or zero.  See the users' manual for details.
3425 
3426    Specify the preallocated storage with either nz or nnz (not both).
3427    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3428    allocation.  See Users-Manual: ch_mat for details.
3429 
3430 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3431 @*/
3432 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3433 {
3434   PetscErrorCode ierr;
3435 
3436   PetscFunctionBegin;
3437   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3438   PetscValidType(B,1);
3439   PetscValidLogicalCollectiveInt(B,bs,2);
3440   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3441   PetscFunctionReturn(0);
3442 }
3443 
3444 #undef __FUNCT__
3445 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3446 /*@C
3447    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3448    (the default sequential PETSc format).
3449 
3450    Collective on MPI_Comm
3451 
3452    Input Parameters:
3453 +  B - the matrix
3454 .  i - the indices into j for the start of each local row (starts with zero)
3455 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3456 -  v - optional values in the matrix
3457 
3458    Level: developer
3459 
3460    Notes:
3461    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3462    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3463    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3464    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3465    block column and the second index is over columns within a block.
3466 
3467 .keywords: matrix, aij, compressed row, sparse
3468 
3469 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3470 @*/
3471 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3472 {
3473   PetscErrorCode ierr;
3474 
3475   PetscFunctionBegin;
3476   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3477   PetscValidType(B,1);
3478   PetscValidLogicalCollectiveInt(B,bs,2);
3479   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3480   PetscFunctionReturn(0);
3481 }
3482 
3483 
3484 #undef __FUNCT__
3485 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3486 /*@
3487      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3488 
3489      Collective on MPI_Comm
3490 
3491    Input Parameters:
3492 +  comm - must be an MPI communicator of size 1
3493 .  bs - size of block
3494 .  m - number of rows
3495 .  n - number of columns
3496 .  i - row indices
3497 .  j - column indices
3498 -  a - matrix values
3499 
3500    Output Parameter:
3501 .  mat - the matrix
3502 
3503    Level: advanced
3504 
3505    Notes:
3506        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3507     once the matrix is destroyed
3508 
3509        You cannot set new nonzero locations into this matrix, that will generate an error.
3510 
3511        The i and j indices are 0 based
3512 
3513        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3514 
3515       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3516       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3517       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3518       with column-major ordering within blocks.
3519 
3520 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3521 
3522 @*/
3523 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3524 {
3525   PetscErrorCode ierr;
3526   PetscInt       ii;
3527   Mat_SeqBAIJ    *baij;
3528 
3529   PetscFunctionBegin;
3530   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3531   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3532 
3533   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3534   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3535   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3536   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3537   baij = (Mat_SeqBAIJ*)(*mat)->data;
3538   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3539   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3540 
3541   baij->i = i;
3542   baij->j = j;
3543   baij->a = a;
3544 
3545   baij->singlemalloc = PETSC_FALSE;
3546   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3547   baij->free_a       = PETSC_FALSE;
3548   baij->free_ij      = PETSC_FALSE;
3549 
3550   for (ii=0; ii<m; ii++) {
3551     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3552 #if defined(PETSC_USE_DEBUG)
3553     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3554 #endif
3555   }
3556 #if defined(PETSC_USE_DEBUG)
3557   for (ii=0; ii<baij->i[m]; ii++) {
3558     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3559     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3560   }
3561 #endif
3562 
3563   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3564   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565   PetscFunctionReturn(0);
3566 }
3567