xref: /petsc/src/mat/impls/baij/seq/baij.c (revision df6f6c19b095f1abc1ac27fcbb9c2092a4a2a65e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
156   }
157   if (!a->sor_workt) {
158     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
159   }
160   if (!a->sor_work) {
161     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
162   }
163   work = a->mult_work;
164   t    = a->sor_workt;
165   w    = a->sor_work;
166 
167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
168   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
169 
170   if (flag & SOR_ZERO_INITIAL_GUESS) {
171     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
172       switch (bs) {
173       case 1:
174         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
175         t[0] = b[0];
176         i2     = 1;
177         idiag += 1;
178         for (i=1; i<m; i++) {
179           v  = aa + ai[i];
180           vi = aj + ai[i];
181           nz = diag[i] - ai[i];
182           s[0] = b[i2];
183           for (j=0; j<nz; j++) {
184             xw[0] = x[vi[j]];
185             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
186           }
187           t[i2] = s[0];
188           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
189           x[i2]  = xw[0];
190           idiag += 1;
191           i2    += 1;
192         }
193         break;
194       case 2:
195         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
196         t[0] = b[0]; t[1] = b[1];
197         i2     = 2;
198         idiag += 4;
199         for (i=1; i<m; i++) {
200           v  = aa + 4*ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2]; s[1] = b[i2+1];
204           for (j=0; j<nz; j++) {
205             idx = 2*vi[j];
206             it  = 4*j;
207             xw[0] = x[idx]; xw[1] = x[1+idx];
208             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
209           }
210           t[i2] = s[0]; t[i2+1] = s[1];
211           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
212           x[i2]   = xw[0]; x[i2+1] = xw[1];
213           idiag  += 4;
214           i2     += 2;
215         }
216         break;
217       case 3:
218         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
219         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
220         i2     = 3;
221         idiag += 9;
222         for (i=1; i<m; i++) {
223           v  = aa + 9*ai[i];
224           vi = aj + ai[i];
225           nz = diag[i] - ai[i];
226           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
227           while (nz--) {
228             idx = 3*(*vi++);
229             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
230             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
231             v  += 9;
232           }
233           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
234           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
235           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
236           idiag  += 9;
237           i2     += 3;
238         }
239         break;
240       case 4:
241         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
242         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
243         i2     = 4;
244         idiag += 16;
245         for (i=1; i<m; i++) {
246           v  = aa + 16*ai[i];
247           vi = aj + ai[i];
248           nz = diag[i] - ai[i];
249           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
250           while (nz--) {
251             idx = 4*(*vi++);
252             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
253             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
254             v  += 16;
255           }
256           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
257           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
258           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
259           idiag  += 16;
260           i2     += 4;
261         }
262         break;
263       case 5:
264         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
266         i2     = 5;
267         idiag += 25;
268         for (i=1; i<m; i++) {
269           v  = aa + 25*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
273           while (nz--) {
274             idx = 5*(*vi++);
275             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
276             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
277             v  += 25;
278           }
279           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
280           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
281           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
282           idiag  += 25;
283           i2     += 5;
284         }
285         break;
286       case 6:
287         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
289         i2     = 6;
290         idiag += 36;
291         for (i=1; i<m; i++) {
292           v  = aa + 36*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
296           while (nz--) {
297             idx = 6*(*vi++);
298             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
300             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
301             v  += 36;
302           }
303           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
304           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
305           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
307           idiag  += 36;
308           i2     += 6;
309         }
310         break;
311       case 7:
312         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
314         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
315         i2     = 7;
316         idiag += 49;
317         for (i=1; i<m; i++) {
318           v  = aa + 49*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
322           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
323           while (nz--) {
324             idx = 7*(*vi++);
325             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
326             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
327             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
328             v  += 49;
329           }
330           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
331           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
332           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
333           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
334           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
335           idiag  += 49;
336           i2     += 7;
337         }
338         break;
339       default:
340         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
341         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
342         i2     = bs;
343         idiag += bs2;
344         for (i=1; i<m; i++) {
345           v  = aa + bs2*ai[i];
346           vi = aj + ai[i];
347           nz = diag[i] - ai[i];
348 
349           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
350           /* copy all rows of x that are needed into contiguous space */
351           workt = work;
352           for (j=0; j<nz; j++) {
353             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
354             workt += bs;
355           }
356           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
357           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
358           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
359 
360           idiag += bs2;
361           i2    += bs;
362         }
363         break;
364       }
365       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
366       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
367       xb = t;
368     }
369     else xb = b;
370     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
371       idiag = a->idiag+bs2*(a->mbs-1);
372       i2 = bs * (m-1);
373       switch (bs) {
374       case 1:
375         s[0]  = xb[i2];
376         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
377         x[i2] = xw[0];
378         i2   -= 1;
379         for (i=m-2; i>=0; i--) {
380           v  = aa + (diag[i]+1);
381           vi = aj + diag[i] + 1;
382           nz = ai[i+1] - diag[i] - 1;
383           s[0] = xb[i2];
384           for (j=0; j<nz; j++) {
385             xw[0] = x[vi[j]];
386             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
387           }
388           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
389           x[i2]  = xw[0];
390           idiag -= 1;
391           i2    -= 1;
392         }
393         break;
394       case 2:
395         s[0]  = xb[i2]; s[1] = xb[i2+1];
396         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
397         x[i2] = xw[0]; x[i2+1] = xw[1];
398         i2    -= 2;
399         idiag -= 4;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + 4*(diag[i] + 1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2]; s[1] = xb[i2+1];
405           for (j=0; j<nz; j++) {
406             idx = 2*vi[j];
407             it  = 4*j;
408             xw[0] = x[idx]; xw[1] = x[1+idx];
409             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
410           }
411           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
412           x[i2]   = xw[0]; x[i2+1] = xw[1];
413           idiag  -= 4;
414           i2     -= 2;
415         }
416         break;
417       case 3:
418         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
419         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
420         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
421         i2    -= 3;
422         idiag -= 9;
423         for (i=m-2; i>=0; i--) {
424           v  = aa + 9*(diag[i]+1);
425           vi = aj + diag[i] + 1;
426           nz = ai[i+1] - diag[i] - 1;
427           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
428           while (nz--) {
429             idx = 3*(*vi++);
430             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
431             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
432             v  += 9;
433           }
434           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
435           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
436           idiag  -= 9;
437           i2     -= 3;
438         }
439         break;
440       case 4:
441         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
442         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
443         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
444         i2    -= 4;
445         idiag -= 16;
446         for (i=m-2; i>=0; i--) {
447           v  = aa + 16*(diag[i]+1);
448           vi = aj + diag[i] + 1;
449           nz = ai[i+1] - diag[i] - 1;
450           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
451           while (nz--) {
452             idx = 4*(*vi++);
453             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
454             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
455             v  += 16;
456           }
457           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
458           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
459           idiag  -= 16;
460           i2     -= 4;
461         }
462         break;
463       case 5:
464         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
465         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
467         i2    -= 5;
468         idiag -= 25;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 25*(diag[i]+1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
474           while (nz--) {
475             idx = 5*(*vi++);
476             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
477             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
478             v  += 25;
479           }
480           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
481           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
482           idiag  -= 25;
483           i2     -= 5;
484         }
485         break;
486       case 6:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
488         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
490         i2    -= 6;
491         idiag -= 36;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 36*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
497           while (nz--) {
498             idx = 6*(*vi++);
499             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
501             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
502             v  += 36;
503           }
504           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
505           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
506           idiag  -= 36;
507           i2     -= 6;
508         }
509         break;
510       case 7:
511         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
512         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
513         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
514         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
515         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
516         i2    -= 7;
517         idiag -= 49;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 49*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
523           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
524           while (nz--) {
525             idx = 7*(*vi++);
526             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
527             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
528             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
529             v  += 49;
530           }
531           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
532           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
533           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
534           idiag  -= 49;
535           i2     -= 7;
536         }
537         break;
538       default:
539         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
540         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
541         i2    -= bs;
542         idiag -= bs2;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + bs2*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547 
548           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
549           /* copy all rows of x that are needed into contiguous space */
550           workt = work;
551           for (j=0; j<nz; j++) {
552             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
553             workt += bs;
554           }
555           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
556           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
557 
558           idiag -= bs2;
559           i2    -= bs;
560         }
561         break;
562       }
563       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
564     }
565     its--;
566   }
567   while (its--) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       idiag = a->idiag;
570       i2 = 0;
571       switch (bs) {
572       case 1:
573         for (i=0; i<m; i++) {
574           v  = aa + ai[i];
575           vi = aj + ai[i];
576           nz = ai[i+1] - ai[i];
577           s[0] = b[i2];
578           for (j=0; j<nz; j++) {
579             xw[0] = x[vi[j]];
580             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
581           }
582           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
583           x[i2] += xw[0];
584           idiag += 1;
585           i2    += 1;
586         }
587         break;
588       case 2:
589         for (i=0; i<m; i++) {
590           v  = aa + 4*ai[i];
591           vi = aj + ai[i];
592           nz = ai[i+1] - ai[i];
593           s[0] = b[i2]; s[1] = b[i2+1];
594           for (j=0; j<nz; j++) {
595             idx = 2*vi[j];
596             it  = 4*j;
597             xw[0] = x[idx]; xw[1] = x[1+idx];
598             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
599           }
600           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
601           x[i2]  += xw[0]; x[i2+1] += xw[1];
602           idiag  += 4;
603           i2     += 2;
604         }
605         break;
606       case 3:
607         for (i=0; i<m; i++) {
608           v  = aa + 9*ai[i];
609           vi = aj + ai[i];
610           nz = ai[i+1] - ai[i];
611           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
612           while (nz--) {
613             idx = 3*(*vi++);
614             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
615             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
616             v  += 9;
617           }
618           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
619           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
620           idiag  += 9;
621           i2     += 3;
622         }
623         break;
624       case 4:
625         for (i=0; i<m; i++) {
626           v  = aa + 16*ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
630           while (nz--) {
631             idx = 4*(*vi++);
632             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
633             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
634             v  += 16;
635           }
636           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
637           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
638           idiag  += 16;
639           i2     += 4;
640         }
641         break;
642       case 5:
643         for (i=0; i<m; i++) {
644           v  = aa + 25*ai[i];
645           vi = aj + ai[i];
646           nz = ai[i+1] - ai[i];
647           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
648           while (nz--) {
649             idx = 5*(*vi++);
650             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
651             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
652             v  += 25;
653           }
654           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
655           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
656           idiag  += 25;
657           i2     += 5;
658         }
659         break;
660       case 6:
661         for (i=0; i<m; i++) {
662           v  = aa + 36*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
666           while (nz--) {
667             idx = 6*(*vi++);
668             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
669             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
671             v  += 36;
672           }
673           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
675           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
676           idiag  += 36;
677           i2     += 6;
678         }
679         break;
680       case 7:
681         for (i=0; i<m; i++) {
682           v  = aa + 49*ai[i];
683           vi = aj + ai[i];
684           nz = ai[i+1] - ai[i];
685           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
686           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
687           while (nz--) {
688             idx = 7*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
692             v  += 49;
693           }
694           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
695           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
697           idiag  += 49;
698           i2     += 7;
699         }
700         break;
701       default:
702         for (i=0; i<m; i++) {
703           v  = aa + bs2*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706 
707           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
708           /* copy all rows of x that are needed into contiguous space */
709           workt = work;
710           for (j=0; j<nz; j++) {
711             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
712             workt += bs;
713           }
714           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
715           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
716 
717           idiag += bs2;
718           i2    += bs;
719         }
720         break;
721       }
722       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
723     }
724     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
725       idiag = a->idiag+bs2*(a->mbs-1);
726       i2 = bs * (m-1);
727       switch (bs) {
728       case 1:
729         for (i=m-1; i>=0; i--) {
730           v  = aa + ai[i];
731           vi = aj + ai[i];
732           nz = ai[i+1] - ai[i];
733           s[0] = b[i2];
734           for (j=0; j<nz; j++) {
735             xw[0] = x[vi[j]];
736             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
737           }
738           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
739           x[i2] += xw[0];
740           idiag -= 1;
741           i2    -= 1;
742         }
743         break;
744       case 2:
745         for (i=m-1; i>=0; i--) {
746           v  = aa + 4*ai[i];
747           vi = aj + ai[i];
748           nz = ai[i+1] - ai[i];
749           s[0] = b[i2]; s[1] = b[i2+1];
750           for (j=0; j<nz; j++) {
751             idx = 2*vi[j];
752             it  = 4*j;
753             xw[0] = x[idx]; xw[1] = x[1+idx];
754             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
755           }
756           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
757           x[i2]  += xw[0]; x[i2+1] += xw[1];
758           idiag  -= 4;
759           i2     -= 2;
760         }
761         break;
762       case 3:
763         for (i=m-1; i>=0; i--) {
764           v  = aa + 9*ai[i];
765           vi = aj + ai[i];
766           nz = ai[i+1] - ai[i];
767           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
768           while (nz--) {
769             idx = 3*(*vi++);
770             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
771             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
772             v  += 9;
773           }
774           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
775           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
776           idiag  -= 9;
777           i2     -= 3;
778         }
779         break;
780       case 4:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + 16*ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
786           while (nz--) {
787             idx = 4*(*vi++);
788             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
789             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
790             v  += 16;
791           }
792           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
793           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
794           idiag  -= 16;
795           i2     -= 4;
796         }
797         break;
798       case 5:
799         for (i=m-1; i>=0; i--) {
800           v  = aa + 25*ai[i];
801           vi = aj + ai[i];
802           nz = ai[i+1] - ai[i];
803           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
804           while (nz--) {
805             idx = 5*(*vi++);
806             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
807             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
808             v  += 25;
809           }
810           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
811           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
812           idiag  -= 25;
813           i2     -= 5;
814         }
815         break;
816       case 6:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 36*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
822           while (nz--) {
823             idx = 6*(*vi++);
824             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
825             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
827             v  += 36;
828           }
829           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
831           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
832           idiag  -= 36;
833           i2     -= 6;
834         }
835         break;
836       case 7:
837         for (i=m-1; i>=0; i--) {
838           v  = aa + 49*ai[i];
839           vi = aj + ai[i];
840           nz = ai[i+1] - ai[i];
841           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
842           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
843           while (nz--) {
844             idx = 7*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
848             v  += 49;
849           }
850           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
851           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
853           idiag  -= 49;
854           i2     -= 7;
855         }
856         break;
857       default:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + bs2*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862 
863           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           /* copy all rows of x that are needed into contiguous space */
865           workt = work;
866           for (j=0; j<nz; j++) {
867             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
868             workt += bs;
869           }
870           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
871           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
872 
873           idiag -= bs2;
874           i2    -= bs;
875         }
876         break;
877       }
878       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
879     }
880   }
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 
887 /*
888     Special version for direct calls from Fortran (Used in PETSc-fun3d)
889 */
890 #if defined(PETSC_HAVE_FORTRAN_CAPS)
891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
893 #define matsetvaluesblocked4_ matsetvaluesblocked4
894 #endif
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "matsetvaluesblocked4_"
898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
899 {
900   Mat               A  = *AA;
901   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
902   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
903   PetscInt          *ai    =a->i,*ailen=a->ilen;
904   PetscInt          *aj    =a->j,stepval,lastcol = -1;
905   const PetscScalar *value = v;
906   MatScalar         *ap,*aa = a->a,*bap;
907 
908   PetscFunctionBegin;
909   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
910   stepval = (n-1)*4;
911   for (k=0; k<m; k++) { /* loop over added rows */
912     row  = im[k];
913     rp   = aj + ai[row];
914     ap   = aa + 16*ai[row];
915     nrow = ailen[row];
916     low  = 0;
917     high = nrow;
918     for (l=0; l<n; l++) { /* loop over added columns */
919       col = in[l];
920       if (col <= lastcol)  low = 0;
921       else                high = nrow;
922       lastcol = col;
923       value   = v + k*(stepval+4 + l)*4;
924       while (high-low > 7) {
925         t = (low+high)/2;
926         if (rp[t] > col) high = t;
927         else             low  = t;
928       }
929       for (i=low; i<high; i++) {
930         if (rp[i] > col) break;
931         if (rp[i] == col) {
932           bap = ap +  16*i;
933           for (ii=0; ii<4; ii++,value+=stepval) {
934             for (jj=ii; jj<16; jj+=4) {
935               bap[jj] += *value++;
936             }
937           }
938           goto noinsert2;
939         }
940       }
941       N = nrow++ - 1;
942       high++; /* added new column index thus must search to one higher than before */
943       /* shift up all the later entries in this row */
944       for (ii=N; ii>=i; ii--) {
945         rp[ii+1] = rp[ii];
946         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
947       }
948       if (N >= i) {
949         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
950       }
951       rp[i] = col;
952       bap   = ap +  16*i;
953       for (ii=0; ii<4; ii++,value+=stepval) {
954         for (jj=ii; jj<16; jj+=4) {
955           bap[jj] = *value++;
956         }
957       }
958       noinsert2:;
959       low = i;
960     }
961     ailen[row] = nrow;
962   }
963   PetscFunctionReturnVoid();
964 }
965 
966 #if defined(PETSC_HAVE_FORTRAN_CAPS)
967 #define matsetvalues4_ MATSETVALUES4
968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
969 #define matsetvalues4_ matsetvalues4
970 #endif
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatSetValues4_"
974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
975 {
976   Mat         A  = *AA;
977   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
979   PetscInt    *ai=a->i,*ailen=a->ilen;
980   PetscInt    *aj=a->j,brow,bcol;
981   PetscInt    ridx,cidx,lastcol = -1;
982   MatScalar   *ap,value,*aa=a->a,*bap;
983 
984   PetscFunctionBegin;
985   for (k=0; k<m; k++) { /* loop over added rows */
986     row  = im[k]; brow = row/4;
987     rp   = aj + ai[brow];
988     ap   = aa + 16*ai[brow];
989     nrow = ailen[brow];
990     low  = 0;
991     high = nrow;
992     for (l=0; l<n; l++) { /* loop over added columns */
993       col   = in[l]; bcol = col/4;
994       ridx  = row % 4; cidx = col % 4;
995       value = v[l + k*n];
996       if (col <= lastcol)  low = 0;
997       else                high = nrow;
998       lastcol = col;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap   = ap +  16*i + 4*cidx + ridx;
1008           *bap += value;
1009           goto noinsert1;
1010         }
1011       }
1012       N = nrow++ - 1;
1013       high++; /* added new column thus must search to one higher than before */
1014       /* shift up all the later entries in this row */
1015       for (ii=N; ii>=i; ii--) {
1016         rp[ii+1] = rp[ii];
1017         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1018       }
1019       if (N>=i) {
1020         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1021       }
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1067 {
1068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       i,j,m = a->mbs;
1071 
1072   PetscFunctionBegin;
1073   if (!a->diag) {
1074     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1075     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1076     a->free_diag = PETSC_TRUE;
1077   }
1078   for (i=0; i<m; i++) {
1079     a->diag[i] = a->i[i+1];
1080     for (j=a->i[i]; j<a->i[i+1]; j++) {
1081       if (a->j[j] == i) {
1082         a->diag[i] = j;
1083         break;
1084       }
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1094 {
1095   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1098   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1099 
1100   PetscFunctionBegin;
1101   *nn = n;
1102   if (!ia) PetscFunctionReturn(0);
1103   if (symmetric) {
1104     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1105     nz   = tia[n];
1106   } else {
1107     tia = a->i; tja = a->j;
1108   }
1109 
1110   if (!blockcompressed && bs > 1) {
1111     (*nn) *= bs;
1112     /* malloc & create the natural set of indices */
1113     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1114     if (n) {
1115       (*ia)[0] = 0;
1116       for (j=1; j<bs; j++) {
1117         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1118       }
1119     }
1120 
1121     for (i=1; i<n; i++) {
1122       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1123       for (j=1; j<bs; j++) {
1124         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1125       }
1126     }
1127     if (n) {
1128       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1129     }
1130 
1131     if (inja) {
1132       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1133       cnt = 0;
1134       for (i=0; i<n; i++) {
1135         for (j=0; j<bs; j++) {
1136           for (k=tia[i]; k<tia[i+1]; k++) {
1137             for (l=0; l<bs; l++) {
1138               (*ja)[cnt++] = bs*tja[k] + l;
1139             }
1140           }
1141         }
1142       }
1143     }
1144 
1145     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1146       ierr = PetscFree(tia);CHKERRQ(ierr);
1147       ierr = PetscFree(tja);CHKERRQ(ierr);
1148     }
1149   } else if (oshift == 1) {
1150     if (symmetric) {
1151       nz = tia[A->rmap->n/bs];
1152       /*  add 1 to i and j indices */
1153       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1154       *ia = tia;
1155       if (ja) {
1156         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1157         *ja = tja;
1158       }
1159     } else {
1160       nz = a->i[A->rmap->n/bs];
1161       /* malloc space and  add 1 to i and j indices */
1162       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1163       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1164       if (ja) {
1165         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1166         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1167       }
1168     }
1169   } else {
1170     *ia = tia;
1171     if (ja) *ja = tja;
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1179 {
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   if (!ia) PetscFunctionReturn(0);
1184   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1185     ierr = PetscFree(*ia);CHKERRQ(ierr);
1186     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1194 {
1195   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199 #if defined(PETSC_USE_LOG)
1200   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1201 #endif
1202   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1205   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1206   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1207   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1208   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1209   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1210   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1211   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1212   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1213   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1214   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1215 
1216   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1217   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1218   ierr = PetscFree(A->data);CHKERRQ(ierr);
1219 
1220   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1236 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1237 {
1238   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   switch (op) {
1243   case MAT_ROW_ORIENTED:
1244     a->roworiented = flg;
1245     break;
1246   case MAT_KEEP_NONZERO_PATTERN:
1247     a->keepnonzeropattern = flg;
1248     break;
1249   case MAT_NEW_NONZERO_LOCATIONS:
1250     a->nonew = (flg ? 0 : 1);
1251     break;
1252   case MAT_NEW_NONZERO_LOCATION_ERR:
1253     a->nonew = (flg ? -1 : 0);
1254     break;
1255   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1256     a->nonew = (flg ? -2 : 0);
1257     break;
1258   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1259     a->nounused = (flg ? -1 : 0);
1260     break;
1261   case MAT_NEW_DIAGONALS:
1262   case MAT_IGNORE_OFF_PROC_ENTRIES:
1263   case MAT_USE_HASH_TABLE:
1264     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1265     break;
1266   case MAT_SPD:
1267   case MAT_SYMMETRIC:
1268   case MAT_STRUCTURALLY_SYMMETRIC:
1269   case MAT_HERMITIAN:
1270   case MAT_SYMMETRY_ETERNAL:
1271     /* These options are handled directly by MatSetOption() */
1272     break;
1273   default:
1274     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1282 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1283 {
1284   PetscErrorCode ierr;
1285   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1286   MatScalar      *aa_i;
1287   PetscScalar    *v_i;
1288 
1289   PetscFunctionBegin;
1290   bs  = A->rmap->bs;
1291   bs2 = bs*bs;
1292   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1293 
1294   bn  = row/bs;   /* Block number */
1295   bp  = row % bs; /* Block Position */
1296   M   = ai[bn+1] - ai[bn];
1297   *nz = bs*M;
1298 
1299   if (v) {
1300     *v = 0;
1301     if (*nz) {
1302       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1303       for (i=0; i<M; i++) { /* for each block in the block row */
1304         v_i  = *v + i*bs;
1305         aa_i = aa + bs2*(ai[bn] + i);
1306         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1307       }
1308     }
1309   }
1310 
1311   if (idx) {
1312     *idx = 0;
1313     if (*nz) {
1314       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1315       for (i=0; i<M; i++) { /* for each block in the block row */
1316         idx_i = *idx + i*bs;
1317         itmp  = bs*aj[ai[bn] + i];
1318         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1319       }
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1327 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1339 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1345   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356   Mat            C;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359   PetscInt       *rows,*cols,bs2=a->bs2;
1360   MatScalar      *array;
1361 
1362   PetscFunctionBegin;
1363   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1365     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1366 
1367     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1369     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1370     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1371     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1372     ierr = PetscFree(col);CHKERRQ(ierr);
1373   } else {
1374     C = *B;
1375   }
1376 
1377   array = a->a;
1378   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1379   for (i=0; i<mbs; i++) {
1380     cols[0] = i*bs;
1381     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382     len = ai[i+1] - ai[i];
1383     for (j=0; j<len; j++) {
1384       rows[0] = (*aj++)*bs;
1385       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1387       array += bs2;
1388     }
1389   }
1390   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1391 
1392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1405 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1406 {
1407   PetscErrorCode ierr;
1408   Mat            Btrans;
1409 
1410   PetscFunctionBegin;
1411   *f   = PETSC_FALSE;
1412   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1413   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1414   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421 {
1422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1423   PetscErrorCode ierr;
1424   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425   int            fd;
1426   PetscScalar    *aa;
1427   FILE           *file;
1428 
1429   PetscFunctionBegin;
1430   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1431   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1432   col_lens[0] = MAT_FILE_CLASSID;
1433 
1434   col_lens[1] = A->rmap->N;
1435   col_lens[2] = A->cmap->n;
1436   col_lens[3] = a->nz*bs2;
1437 
1438   /* store lengths of each row and write (including header) to file */
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443     }
1444   }
1445   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1446   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1447 
1448   /* store column indices (zero start index) */
1449   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454         for (l=0; l<bs; l++) {
1455           jj[count++] = bs*a->j[k] + l;
1456         }
1457       }
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1461   ierr = PetscFree(jj);CHKERRQ(ierr);
1462 
1463   /* store nonzero values */
1464   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1465   count = 0;
1466   for (i=0; i<a->mbs; i++) {
1467     for (j=0; j<bs; j++) {
1468       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469         for (l=0; l<bs; l++) {
1470           aa[count++] = a->a[bs2*k + l*bs + j];
1471         }
1472       }
1473     }
1474   }
1475   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1476   ierr = PetscFree(aa);CHKERRQ(ierr);
1477 
1478   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1479   if (file) {
1480     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1487 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1488 {
1489   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1490   PetscErrorCode    ierr;
1491   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1492   PetscViewerFormat format;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1496   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1497     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1498   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1499     Mat aij;
1500     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1501     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1502     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1503   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1504       PetscFunctionReturn(0);
1505   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1506     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1507     for (i=0; i<a->mbs; i++) {
1508       for (j=0; j<bs; j++) {
1509         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1510         for (k=a->i[i]; k<a->i[i+1]; k++) {
1511           for (l=0; l<bs; l++) {
1512 #if defined(PETSC_USE_COMPLEX)
1513             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1514               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1515                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1516             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1517               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1518                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1519             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1521             }
1522 #else
1523             if (a->a[bs2*k + l*bs + j] != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1525             }
1526 #endif
1527           }
1528         }
1529         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1530       }
1531     }
1532     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1533   } else {
1534     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1535     for (i=0; i<a->mbs; i++) {
1536       for (j=0; j<bs; j++) {
1537         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1538         for (k=a->i[i]; k<a->i[i+1]; k++) {
1539           for (l=0; l<bs; l++) {
1540 #if defined(PETSC_USE_COMPLEX)
1541             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1543                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1544             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1546                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1547             } else {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1549             }
1550 #else
1551             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1552 #endif
1553           }
1554         }
1555         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1556       }
1557     }
1558     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1559   }
1560   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1561   PetscFunctionReturn(0);
1562 }
1563 
1564 #include <petscdraw.h>
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1567 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1568 {
1569   Mat               A = (Mat) Aa;
1570   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1571   PetscErrorCode    ierr;
1572   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1573   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1574   MatScalar         *aa;
1575   PetscViewer       viewer;
1576   PetscViewerFormat format;
1577 
1578   PetscFunctionBegin;
1579   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1580   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1581 
1582   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1583 
1584   /* loop over matrix elements drawing boxes */
1585 
1586   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1587     color = PETSC_DRAW_BLUE;
1588     for (i=0,row=0; i<mbs; i++,row+=bs) {
1589       for (j=a->i[i]; j<a->i[i+1]; j++) {
1590         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1591         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1592         aa  = a->a + j*bs2;
1593         for (k=0; k<bs; k++) {
1594           for (l=0; l<bs; l++) {
1595             if (PetscRealPart(*aa++) >=  0.) continue;
1596             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1597           }
1598         }
1599       }
1600     }
1601     color = PETSC_DRAW_CYAN;
1602     for (i=0,row=0; i<mbs; i++,row+=bs) {
1603       for (j=a->i[i]; j<a->i[i+1]; j++) {
1604         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1605         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1606         aa  = a->a + j*bs2;
1607         for (k=0; k<bs; k++) {
1608           for (l=0; l<bs; l++) {
1609             if (PetscRealPart(*aa++) != 0.) continue;
1610             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1611           }
1612         }
1613       }
1614     }
1615     color = PETSC_DRAW_RED;
1616     for (i=0,row=0; i<mbs; i++,row+=bs) {
1617       for (j=a->i[i]; j<a->i[i+1]; j++) {
1618         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1619         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1620         aa  = a->a + j*bs2;
1621         for (k=0; k<bs; k++) {
1622           for (l=0; l<bs; l++) {
1623             if (PetscRealPart(*aa++) <= 0.) continue;
1624             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1625           }
1626         }
1627       }
1628     }
1629   } else {
1630     /* use contour shading to indicate magnitude of values */
1631     /* first determine max of all nonzero values */
1632     PetscDraw popup;
1633     PetscReal scale,maxv = 0.0;
1634 
1635     for (i=0; i<a->nz*a->bs2; i++) {
1636       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1637     }
1638     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1639     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1640     if (popup) {
1641       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1642     }
1643     for (i=0,row=0; i<mbs; i++,row+=bs) {
1644       for (j=a->i[i]; j<a->i[i+1]; j++) {
1645         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1646         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1647         aa  = a->a + j*bs2;
1648         for (k=0; k<bs; k++) {
1649           for (l=0; l<bs; l++) {
1650             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1651             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1652           }
1653         }
1654       }
1655     }
1656   }
1657   PetscFunctionReturn(0);
1658 }
1659 
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1662 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1663 {
1664   PetscErrorCode ierr;
1665   PetscReal      xl,yl,xr,yr,w,h;
1666   PetscDraw      draw;
1667   PetscBool      isnull;
1668 
1669   PetscFunctionBegin;
1670   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1671   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1672 
1673   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1674   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1675   xr  += w;    yr += h;  xl = -w;     yl = -h;
1676   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1677   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1678   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 #undef __FUNCT__
1683 #define __FUNCT__ "MatView_SeqBAIJ"
1684 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1685 {
1686   PetscErrorCode ierr;
1687   PetscBool      iascii,isbinary,isdraw;
1688 
1689   PetscFunctionBegin;
1690   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1691   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1692   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1693   if (iascii) {
1694     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1695   } else if (isbinary) {
1696     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1697   } else if (isdraw) {
1698     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1699   } else {
1700     Mat B;
1701     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1702     ierr = MatView(B,viewer);CHKERRQ(ierr);
1703     ierr = MatDestroy(&B);CHKERRQ(ierr);
1704   }
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 
1709 #undef __FUNCT__
1710 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1711 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1712 {
1713   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1714   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1715   PetscInt    *ai = a->i,*ailen = a->ilen;
1716   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1717   MatScalar   *ap,*aa = a->a;
1718 
1719   PetscFunctionBegin;
1720   for (k=0; k<m; k++) { /* loop over rows */
1721     row = im[k]; brow = row/bs;
1722     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1723     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1724     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1725     nrow = ailen[brow];
1726     for (l=0; l<n; l++) { /* loop over columns */
1727       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1728       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1729       col  = in[l];
1730       bcol = col/bs;
1731       cidx = col%bs;
1732       ridx = row%bs;
1733       high = nrow;
1734       low  = 0; /* assume unsorted */
1735       while (high-low > 5) {
1736         t = (low+high)/2;
1737         if (rp[t] > bcol) high = t;
1738         else             low  = t;
1739       }
1740       for (i=low; i<high; i++) {
1741         if (rp[i] > bcol) break;
1742         if (rp[i] == bcol) {
1743           *v++ = ap[bs2*i+bs*cidx+ridx];
1744           goto finished;
1745         }
1746       }
1747       *v++ = 0.0;
1748 finished:;
1749     }
1750   }
1751   PetscFunctionReturn(0);
1752 }
1753 
1754 #undef __FUNCT__
1755 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1756 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1757 {
1758   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1759   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1760   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1761   PetscErrorCode    ierr;
1762   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1763   PetscBool         roworiented=a->roworiented;
1764   const PetscScalar *value     = v;
1765   MatScalar         *ap,*aa = a->a,*bap;
1766 
1767   PetscFunctionBegin;
1768   if (roworiented) {
1769     stepval = (n-1)*bs;
1770   } else {
1771     stepval = (m-1)*bs;
1772   }
1773   for (k=0; k<m; k++) { /* loop over added rows */
1774     row = im[k];
1775     if (row < 0) continue;
1776 #if defined(PETSC_USE_DEBUG)
1777     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1778 #endif
1779     rp   = aj + ai[row];
1780     ap   = aa + bs2*ai[row];
1781     rmax = imax[row];
1782     nrow = ailen[row];
1783     low  = 0;
1784     high = nrow;
1785     for (l=0; l<n; l++) { /* loop over added columns */
1786       if (in[l] < 0) continue;
1787 #if defined(PETSC_USE_DEBUG)
1788       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1789 #endif
1790       col = in[l];
1791       if (roworiented) {
1792         value = v + (k*(stepval+bs) + l)*bs;
1793       } else {
1794         value = v + (l*(stepval+bs) + k)*bs;
1795       }
1796       if (col <= lastcol) low = 0;
1797       else high = nrow;
1798       lastcol = col;
1799       while (high-low > 7) {
1800         t = (low+high)/2;
1801         if (rp[t] > col) high = t;
1802         else             low  = t;
1803       }
1804       for (i=low; i<high; i++) {
1805         if (rp[i] > col) break;
1806         if (rp[i] == col) {
1807           bap = ap +  bs2*i;
1808           if (roworiented) {
1809             if (is == ADD_VALUES) {
1810               for (ii=0; ii<bs; ii++,value+=stepval) {
1811                 for (jj=ii; jj<bs2; jj+=bs) {
1812                   bap[jj] += *value++;
1813                 }
1814               }
1815             } else {
1816               for (ii=0; ii<bs; ii++,value+=stepval) {
1817                 for (jj=ii; jj<bs2; jj+=bs) {
1818                   bap[jj] = *value++;
1819                 }
1820               }
1821             }
1822           } else {
1823             if (is == ADD_VALUES) {
1824               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1825                 for (jj=0; jj<bs; jj++) {
1826                   bap[jj] += value[jj];
1827                 }
1828                 bap += bs;
1829               }
1830             } else {
1831               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1832                 for (jj=0; jj<bs; jj++) {
1833                   bap[jj]  = value[jj];
1834                 }
1835                 bap += bs;
1836               }
1837             }
1838           }
1839           goto noinsert2;
1840         }
1841       }
1842       if (nonew == 1) goto noinsert2;
1843       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1844       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1845       N = nrow++ - 1; high++;
1846       /* shift up all the later entries in this row */
1847       for (ii=N; ii>=i; ii--) {
1848         rp[ii+1] = rp[ii];
1849         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1850       }
1851       if (N >= i) {
1852         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1853       }
1854       rp[i] = col;
1855       bap   = ap +  bs2*i;
1856       if (roworiented) {
1857         for (ii=0; ii<bs; ii++,value+=stepval) {
1858           for (jj=ii; jj<bs2; jj+=bs) {
1859             bap[jj] = *value++;
1860           }
1861         }
1862       } else {
1863         for (ii=0; ii<bs; ii++,value+=stepval) {
1864           for (jj=0; jj<bs; jj++) {
1865             *bap++ = *value++;
1866           }
1867         }
1868       }
1869 noinsert2:;
1870       low = i;
1871     }
1872     ailen[row] = nrow;
1873   }
1874   PetscFunctionReturn(0);
1875 }
1876 
1877 #undef __FUNCT__
1878 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1879 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1880 {
1881   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1882   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1883   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1884   PetscErrorCode ierr;
1885   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1886   MatScalar      *aa  = a->a,*ap;
1887   PetscReal      ratio=0.6;
1888 
1889   PetscFunctionBegin;
1890   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1891 
1892   if (m) rmax = ailen[0];
1893   for (i=1; i<mbs; i++) {
1894     /* move each row back by the amount of empty slots (fshift) before it*/
1895     fshift += imax[i-1] - ailen[i-1];
1896     rmax    = PetscMax(rmax,ailen[i]);
1897     if (fshift) {
1898       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1899       N  = ailen[i];
1900       for (j=0; j<N; j++) {
1901         ip[j-fshift] = ip[j];
1902 
1903         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1904       }
1905     }
1906     ai[i] = ai[i-1] + ailen[i-1];
1907   }
1908   if (mbs) {
1909     fshift += imax[mbs-1] - ailen[mbs-1];
1910     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1911   }
1912 
1913   /* reset ilen and imax for each row */
1914   a->nonzerorowcnt = 0;
1915   for (i=0; i<mbs; i++) {
1916     ailen[i] = imax[i] = ai[i+1] - ai[i];
1917     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1918   }
1919   a->nz = ai[mbs];
1920 
1921   /* diagonals may have moved, so kill the diagonal pointers */
1922   a->idiagvalid = PETSC_FALSE;
1923   if (fshift && a->diag) {
1924     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1925     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1926     a->diag = 0;
1927   }
1928   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1929   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1930   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1931   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1932 
1933   A->info.mallocs    += a->reallocs;
1934   a->reallocs         = 0;
1935   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1936   a->rmax             = rmax;
1937 
1938   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1939   PetscFunctionReturn(0);
1940 }
1941 
1942 /*
1943    This function returns an array of flags which indicate the locations of contiguous
1944    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1945    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1946    Assume: sizes should be long enough to hold all the values.
1947 */
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1950 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1951 {
1952   PetscInt  i,j,k,row;
1953   PetscBool flg;
1954 
1955   PetscFunctionBegin;
1956   for (i=0,j=0; i<n; j++) {
1957     row = idx[i];
1958     if (row%bs!=0) { /* Not the begining of a block */
1959       sizes[j] = 1;
1960       i++;
1961     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1962       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1963       i++;
1964     } else { /* Begining of the block, so check if the complete block exists */
1965       flg = PETSC_TRUE;
1966       for (k=1; k<bs; k++) {
1967         if (row+k != idx[i+k]) { /* break in the block */
1968           flg = PETSC_FALSE;
1969           break;
1970         }
1971       }
1972       if (flg) { /* No break in the bs */
1973         sizes[j] = bs;
1974         i       += bs;
1975       } else {
1976         sizes[j] = 1;
1977         i++;
1978       }
1979     }
1980   }
1981   *bs_max = j;
1982   PetscFunctionReturn(0);
1983 }
1984 
1985 #undef __FUNCT__
1986 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1987 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1988 {
1989   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1990   PetscErrorCode    ierr;
1991   PetscInt          i,j,k,count,*rows;
1992   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1993   PetscScalar       zero = 0.0;
1994   MatScalar         *aa;
1995   const PetscScalar *xx;
1996   PetscScalar       *bb;
1997 
1998   PetscFunctionBegin;
1999   /* fix right hand side if needed */
2000   if (x && b) {
2001     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2002     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2003     for (i=0; i<is_n; i++) {
2004       bb[is_idx[i]] = diag*xx[is_idx[i]];
2005     }
2006     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2007     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2008   }
2009 
2010   /* Make a copy of the IS and  sort it */
2011   /* allocate memory for rows,sizes */
2012   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2013 
2014   /* copy IS values to rows, and sort them */
2015   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2016   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2017 
2018   if (baij->keepnonzeropattern) {
2019     for (i=0; i<is_n; i++) sizes[i] = 1;
2020     bs_max          = is_n;
2021   } else {
2022     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2023     A->nonzerostate++;
2024   }
2025 
2026   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2027     row = rows[j];
2028     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2029     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2030     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2031     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2032       if (diag != (PetscScalar)0.0) {
2033         if (baij->ilen[row/bs] > 0) {
2034           baij->ilen[row/bs]       = 1;
2035           baij->j[baij->i[row/bs]] = row/bs;
2036 
2037           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2038         }
2039         /* Now insert all the diagonal values for this bs */
2040         for (k=0; k<bs; k++) {
2041           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2042         }
2043       } else { /* (diag == 0.0) */
2044         baij->ilen[row/bs] = 0;
2045       } /* end (diag == 0.0) */
2046     } else { /* (sizes[i] != bs) */
2047 #if defined(PETSC_USE_DEBUG)
2048       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2049 #endif
2050       for (k=0; k<count; k++) {
2051         aa[0] =  zero;
2052         aa   += bs;
2053       }
2054       if (diag != (PetscScalar)0.0) {
2055         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2056       }
2057     }
2058   }
2059 
2060   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2061   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2062   PetscFunctionReturn(0);
2063 }
2064 
2065 #undef __FUNCT__
2066 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2067 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2068 {
2069   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2070   PetscErrorCode    ierr;
2071   PetscInt          i,j,k,count;
2072   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2073   PetscScalar       zero = 0.0;
2074   MatScalar         *aa;
2075   const PetscScalar *xx;
2076   PetscScalar       *bb;
2077   PetscBool         *zeroed,vecs = PETSC_FALSE;
2078 
2079   PetscFunctionBegin;
2080   /* fix right hand side if needed */
2081   if (x && b) {
2082     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2083     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2084     vecs = PETSC_TRUE;
2085   }
2086 
2087   /* zero the columns */
2088   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2089   for (i=0; i<is_n; i++) {
2090     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2091     zeroed[is_idx[i]] = PETSC_TRUE;
2092   }
2093   for (i=0; i<A->rmap->N; i++) {
2094     if (!zeroed[i]) {
2095       row = i/bs;
2096       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2097         for (k=0; k<bs; k++) {
2098           col = bs*baij->j[j] + k;
2099           if (zeroed[col]) {
2100             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2101             if (vecs) bb[i] -= aa[0]*xx[col];
2102             aa[0] = 0.0;
2103           }
2104         }
2105       }
2106     } else if (vecs) bb[i] = diag*xx[i];
2107   }
2108   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2109   if (vecs) {
2110     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2112   }
2113 
2114   /* zero the rows */
2115   for (i=0; i<is_n; i++) {
2116     row   = is_idx[i];
2117     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2118     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2119     for (k=0; k<count; k++) {
2120       aa[0] =  zero;
2121       aa   += bs;
2122     }
2123     if (diag != (PetscScalar)0.0) {
2124       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2125     }
2126   }
2127   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2128   PetscFunctionReturn(0);
2129 }
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2133 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2134 {
2135   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2136   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2137   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2138   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2139   PetscErrorCode ierr;
2140   PetscInt       ridx,cidx,bs2=a->bs2;
2141   PetscBool      roworiented=a->roworiented;
2142   MatScalar      *ap,value,*aa=a->a,*bap;
2143 
2144   PetscFunctionBegin;
2145   for (k=0; k<m; k++) { /* loop over added rows */
2146     row  = im[k];
2147     brow = row/bs;
2148     if (row < 0) continue;
2149 #if defined(PETSC_USE_DEBUG)
2150     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2151 #endif
2152     rp   = aj + ai[brow];
2153     ap   = aa + bs2*ai[brow];
2154     rmax = imax[brow];
2155     nrow = ailen[brow];
2156     low  = 0;
2157     high = nrow;
2158     for (l=0; l<n; l++) { /* loop over added columns */
2159       if (in[l] < 0) continue;
2160 #if defined(PETSC_USE_DEBUG)
2161       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2162 #endif
2163       col  = in[l]; bcol = col/bs;
2164       ridx = row % bs; cidx = col % bs;
2165       if (roworiented) {
2166         value = v[l + k*n];
2167       } else {
2168         value = v[k + l*m];
2169       }
2170       if (col <= lastcol) low = 0; else high = nrow;
2171       lastcol = col;
2172       while (high-low > 7) {
2173         t = (low+high)/2;
2174         if (rp[t] > bcol) high = t;
2175         else              low  = t;
2176       }
2177       for (i=low; i<high; i++) {
2178         if (rp[i] > bcol) break;
2179         if (rp[i] == bcol) {
2180           bap = ap +  bs2*i + bs*cidx + ridx;
2181           if (is == ADD_VALUES) *bap += value;
2182           else                  *bap  = value;
2183           goto noinsert1;
2184         }
2185       }
2186       if (nonew == 1) goto noinsert1;
2187       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2188       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2189       N = nrow++ - 1; high++;
2190       /* shift up all the later entries in this row */
2191       for (ii=N; ii>=i; ii--) {
2192         rp[ii+1] = rp[ii];
2193         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2194       }
2195       if (N>=i) {
2196         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2197       }
2198       rp[i]                      = bcol;
2199       ap[bs2*i + bs*cidx + ridx] = value;
2200       a->nz++;
2201       A->nonzerostate++;
2202 noinsert1:;
2203       low = i;
2204     }
2205     ailen[brow] = nrow;
2206   }
2207   PetscFunctionReturn(0);
2208 }
2209 
2210 #undef __FUNCT__
2211 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2212 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2213 {
2214   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2215   Mat            outA;
2216   PetscErrorCode ierr;
2217   PetscBool      row_identity,col_identity;
2218 
2219   PetscFunctionBegin;
2220   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2221   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2222   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2223   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2224 
2225   outA            = inA;
2226   inA->factortype = MAT_FACTOR_LU;
2227 
2228   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2229 
2230   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2231   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2232   a->row = row;
2233   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2234   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2235   a->col = col;
2236 
2237   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2238   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2239   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2240   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2241 
2242   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2243   if (!a->solve_work) {
2244     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2245     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2246   }
2247   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2248   PetscFunctionReturn(0);
2249 }
2250 
2251 #undef __FUNCT__
2252 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2253 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2254 {
2255   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2256   PetscInt    i,nz,mbs;
2257 
2258   PetscFunctionBegin;
2259   nz  = baij->maxnz;
2260   mbs = baij->mbs;
2261   for (i=0; i<nz; i++) {
2262     baij->j[i] = indices[i];
2263   }
2264   baij->nz = nz;
2265   for (i=0; i<mbs; i++) {
2266     baij->ilen[i] = baij->imax[i];
2267   }
2268   PetscFunctionReturn(0);
2269 }
2270 
2271 #undef __FUNCT__
2272 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2273 /*@
2274     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2275        in the matrix.
2276 
2277   Input Parameters:
2278 +  mat - the SeqBAIJ matrix
2279 -  indices - the column indices
2280 
2281   Level: advanced
2282 
2283   Notes:
2284     This can be called if you have precomputed the nonzero structure of the
2285   matrix and want to provide it to the matrix object to improve the performance
2286   of the MatSetValues() operation.
2287 
2288     You MUST have set the correct numbers of nonzeros per row in the call to
2289   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2290 
2291     MUST be called before any calls to MatSetValues();
2292 
2293 @*/
2294 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2295 {
2296   PetscErrorCode ierr;
2297 
2298   PetscFunctionBegin;
2299   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2300   PetscValidPointer(indices,2);
2301   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 #undef __FUNCT__
2306 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2307 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2308 {
2309   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2310   PetscErrorCode ierr;
2311   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2312   PetscReal      atmp;
2313   PetscScalar    *x,zero = 0.0;
2314   MatScalar      *aa;
2315   PetscInt       ncols,brow,krow,kcol;
2316 
2317   PetscFunctionBegin;
2318   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2319   bs  = A->rmap->bs;
2320   aa  = a->a;
2321   ai  = a->i;
2322   aj  = a->j;
2323   mbs = a->mbs;
2324 
2325   ierr = VecSet(v,zero);CHKERRQ(ierr);
2326   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2327   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2328   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2329   for (i=0; i<mbs; i++) {
2330     ncols = ai[1] - ai[0]; ai++;
2331     brow  = bs*i;
2332     for (j=0; j<ncols; j++) {
2333       for (kcol=0; kcol<bs; kcol++) {
2334         for (krow=0; krow<bs; krow++) {
2335           atmp = PetscAbsScalar(*aa);aa++;
2336           row  = brow + krow;   /* row index */
2337           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2338         }
2339       }
2340       aj++;
2341     }
2342   }
2343   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2344   PetscFunctionReturn(0);
2345 }
2346 
2347 #undef __FUNCT__
2348 #define __FUNCT__ "MatCopy_SeqBAIJ"
2349 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2350 {
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   /* If the two matrices have the same copy implementation, use fast copy. */
2355   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2356     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2357     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2358     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2359 
2360     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2361     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2362     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2363   } else {
2364     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2371 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2372 {
2373   PetscErrorCode ierr;
2374 
2375   PetscFunctionBegin;
2376   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2377   PetscFunctionReturn(0);
2378 }
2379 
2380 #undef __FUNCT__
2381 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2382 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2383 {
2384   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2385 
2386   PetscFunctionBegin;
2387   *array = a->a;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2393 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2394 {
2395   PetscFunctionBegin;
2396   PetscFunctionReturn(0);
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2401 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2402 {
2403   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2404   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2405   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2406   PetscErrorCode ierr;
2407 
2408   PetscFunctionBegin;
2409   /* Set the number of nonzeros in the new matrix */
2410   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2411   PetscFunctionReturn(0);
2412 }
2413 
2414 #undef __FUNCT__
2415 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2416 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2417 {
2418   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2419   PetscErrorCode ierr;
2420   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2421   PetscBLASInt   one=1;
2422 
2423   PetscFunctionBegin;
2424   if (str == SAME_NONZERO_PATTERN) {
2425     PetscScalar  alpha = a;
2426     PetscBLASInt bnz;
2427     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2428     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2429     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2430   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2431     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2432   } else {
2433     Mat      B;
2434     PetscInt *nnz;
2435     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2436     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2437     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2438     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2439     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2440     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2441     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2442     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2443     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2444     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2445     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2446     ierr = PetscFree(nnz);CHKERRQ(ierr);
2447   }
2448   PetscFunctionReturn(0);
2449 }
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2453 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2454 {
2455   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2456   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2457   MatScalar   *aa = a->a;
2458 
2459   PetscFunctionBegin;
2460   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2461   PetscFunctionReturn(0);
2462 }
2463 
2464 #undef __FUNCT__
2465 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2467 {
2468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2469   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2470   MatScalar   *aa = a->a;
2471 
2472   PetscFunctionBegin;
2473   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2479 /*
2480     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2481 */
2482 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2483 {
2484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2485   PetscErrorCode ierr;
2486   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2487   PetscInt       nz = a->i[m],row,*jj,mr,col;
2488 
2489   PetscFunctionBegin;
2490   *nn = n;
2491   if (!ia) PetscFunctionReturn(0);
2492   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2493   else {
2494     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2495     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2496     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2497     jj   = a->j;
2498     for (i=0; i<nz; i++) {
2499       collengths[jj[i]]++;
2500     }
2501     cia[0] = oshift;
2502     for (i=0; i<n; i++) {
2503       cia[i+1] = cia[i] + collengths[i];
2504     }
2505     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2506     jj   = a->j;
2507     for (row=0; row<m; row++) {
2508       mr = a->i[row+1] - a->i[row];
2509       for (i=0; i<mr; i++) {
2510         col = *jj++;
2511 
2512         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2513       }
2514     }
2515     ierr = PetscFree(collengths);CHKERRQ(ierr);
2516     *ia  = cia; *ja = cja;
2517   }
2518   PetscFunctionReturn(0);
2519 }
2520 
2521 #undef __FUNCT__
2522 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2523 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2524 {
2525   PetscErrorCode ierr;
2526 
2527   PetscFunctionBegin;
2528   if (!ia) PetscFunctionReturn(0);
2529   ierr = PetscFree(*ia);CHKERRQ(ierr);
2530   ierr = PetscFree(*ja);CHKERRQ(ierr);
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 /*
2535  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2536  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2537  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2538  */
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2541 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2542 {
2543   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2544   PetscErrorCode ierr;
2545   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2546   PetscInt       nz = a->i[m],row,*jj,mr,col;
2547   PetscInt       *cspidx;
2548 
2549   PetscFunctionBegin;
2550   *nn = n;
2551   if (!ia) PetscFunctionReturn(0);
2552 
2553   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2554   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2555   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2556   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2557   jj   = a->j;
2558   for (i=0; i<nz; i++) {
2559     collengths[jj[i]]++;
2560   }
2561   cia[0] = oshift;
2562   for (i=0; i<n; i++) {
2563     cia[i+1] = cia[i] + collengths[i];
2564   }
2565   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2566   jj   = a->j;
2567   for (row=0; row<m; row++) {
2568     mr = a->i[row+1] - a->i[row];
2569     for (i=0; i<mr; i++) {
2570       col = *jj++;
2571       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2572       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2573     }
2574   }
2575   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2576   *ia    = cia; *ja = cja;
2577   *spidx = cspidx;
2578   PetscFunctionReturn(0);
2579 }
2580 
2581 #undef __FUNCT__
2582 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2583 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2584 {
2585   PetscErrorCode ierr;
2586 
2587   PetscFunctionBegin;
2588   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2589   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2590   PetscFunctionReturn(0);
2591 }
2592 
2593 /* -------------------------------------------------------------------*/
2594 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2595                                        MatGetRow_SeqBAIJ,
2596                                        MatRestoreRow_SeqBAIJ,
2597                                        MatMult_SeqBAIJ_N,
2598                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2599                                        MatMultTranspose_SeqBAIJ,
2600                                        MatMultTransposeAdd_SeqBAIJ,
2601                                        0,
2602                                        0,
2603                                        0,
2604                                /* 10*/ 0,
2605                                        MatLUFactor_SeqBAIJ,
2606                                        0,
2607                                        0,
2608                                        MatTranspose_SeqBAIJ,
2609                                /* 15*/ MatGetInfo_SeqBAIJ,
2610                                        MatEqual_SeqBAIJ,
2611                                        MatGetDiagonal_SeqBAIJ,
2612                                        MatDiagonalScale_SeqBAIJ,
2613                                        MatNorm_SeqBAIJ,
2614                                /* 20*/ 0,
2615                                        MatAssemblyEnd_SeqBAIJ,
2616                                        MatSetOption_SeqBAIJ,
2617                                        MatZeroEntries_SeqBAIJ,
2618                                /* 24*/ MatZeroRows_SeqBAIJ,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                        0,
2623                                /* 29*/ MatSetUp_SeqBAIJ,
2624                                        0,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                /* 34*/ MatDuplicate_SeqBAIJ,
2629                                        0,
2630                                        0,
2631                                        MatILUFactor_SeqBAIJ,
2632                                        0,
2633                                /* 39*/ MatAXPY_SeqBAIJ,
2634                                        MatGetSubMatrices_SeqBAIJ,
2635                                        MatIncreaseOverlap_SeqBAIJ,
2636                                        MatGetValues_SeqBAIJ,
2637                                        MatCopy_SeqBAIJ,
2638                                /* 44*/ 0,
2639                                        MatScale_SeqBAIJ,
2640                                        0,
2641                                        0,
2642                                        MatZeroRowsColumns_SeqBAIJ,
2643                                /* 49*/ 0,
2644                                        MatGetRowIJ_SeqBAIJ,
2645                                        MatRestoreRowIJ_SeqBAIJ,
2646                                        MatGetColumnIJ_SeqBAIJ,
2647                                        MatRestoreColumnIJ_SeqBAIJ,
2648                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2649                                        0,
2650                                        0,
2651                                        0,
2652                                        MatSetValuesBlocked_SeqBAIJ,
2653                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2654                                        MatDestroy_SeqBAIJ,
2655                                        MatView_SeqBAIJ,
2656                                        0,
2657                                        0,
2658                                /* 64*/ 0,
2659                                        0,
2660                                        0,
2661                                        0,
2662                                        0,
2663                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2664                                        0,
2665                                        MatConvert_Basic,
2666                                        0,
2667                                        0,
2668                                /* 74*/ 0,
2669                                        MatFDColoringApply_BAIJ,
2670                                        0,
2671                                        0,
2672                                        0,
2673                                /* 79*/ 0,
2674                                        0,
2675                                        0,
2676                                        0,
2677                                        MatLoad_SeqBAIJ,
2678                                /* 84*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /* 89*/ 0,
2684                                        0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                /* 94*/ 0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 99*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                /*104*/ 0,
2699                                        MatRealPart_SeqBAIJ,
2700                                        MatImaginaryPart_SeqBAIJ,
2701                                        0,
2702                                        0,
2703                                /*109*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        MatMissingDiagonal_SeqBAIJ,
2708                                /*114*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /*119*/ 0,
2714                                        0,
2715                                        MatMultHermitianTranspose_SeqBAIJ,
2716                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2717                                        0,
2718                                /*124*/ 0,
2719                                        0,
2720                                        MatInvertBlockDiagonal_SeqBAIJ,
2721                                        0,
2722                                        0,
2723                                /*129*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        0,
2728                                /*134*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*139*/ 0,
2734                                        0,
2735                                        0,
2736                                        MatFDColoringSetUp_SeqXAIJ,
2737                                        0,
2738                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2739 };
2740 
2741 #undef __FUNCT__
2742 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2743 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2744 {
2745   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2746   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2747   PetscErrorCode ierr;
2748 
2749   PetscFunctionBegin;
2750   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2751 
2752   /* allocate space for values if not already there */
2753   if (!aij->saved_values) {
2754     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2755     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2756   }
2757 
2758   /* copy values over */
2759   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 #undef __FUNCT__
2764 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2765 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2766 {
2767   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2768   PetscErrorCode ierr;
2769   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2770 
2771   PetscFunctionBegin;
2772   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2773   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2774 
2775   /* copy values over */
2776   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2777   PetscFunctionReturn(0);
2778 }
2779 
2780 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2781 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2782 
2783 #undef __FUNCT__
2784 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2785 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2786 {
2787   Mat_SeqBAIJ    *b;
2788   PetscErrorCode ierr;
2789   PetscInt       i,mbs,nbs,bs2;
2790   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2791 
2792   PetscFunctionBegin;
2793   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2794   if (nz == MAT_SKIP_ALLOCATION) {
2795     skipallocation = PETSC_TRUE;
2796     nz             = 0;
2797   }
2798 
2799   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2800   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2803 
2804   B->preallocated = PETSC_TRUE;
2805 
2806   mbs = B->rmap->n/bs;
2807   nbs = B->cmap->n/bs;
2808   bs2 = bs*bs;
2809 
2810   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2811 
2812   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2813   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2814   if (nnz) {
2815     for (i=0; i<mbs; i++) {
2816       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2817       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2818     }
2819   }
2820 
2821   b    = (Mat_SeqBAIJ*)B->data;
2822   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2823   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2824   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2825 
2826   if (!flg) {
2827     switch (bs) {
2828     case 1:
2829       B->ops->mult    = MatMult_SeqBAIJ_1;
2830       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2831       break;
2832     case 2:
2833       B->ops->mult    = MatMult_SeqBAIJ_2;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2835       break;
2836     case 3:
2837       B->ops->mult    = MatMult_SeqBAIJ_3;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2839       break;
2840     case 4:
2841       B->ops->mult    = MatMult_SeqBAIJ_4;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2843       break;
2844     case 5:
2845       B->ops->mult    = MatMult_SeqBAIJ_5;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2847       break;
2848     case 6:
2849       B->ops->mult    = MatMult_SeqBAIJ_6;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2851       break;
2852     case 7:
2853       B->ops->mult    = MatMult_SeqBAIJ_7;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2855       break;
2856     case 15:
2857       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2859       break;
2860     default:
2861       B->ops->mult    = MatMult_SeqBAIJ_N;
2862       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2863       break;
2864     }
2865   }
2866   B->ops->sor = MatSOR_SeqBAIJ;
2867   b->mbs = mbs;
2868   b->nbs = nbs;
2869   if (!skipallocation) {
2870     if (!b->imax) {
2871       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2872       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2873 
2874       b->free_imax_ilen = PETSC_TRUE;
2875     }
2876     /* b->ilen will count nonzeros in each block row so far. */
2877     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2878     if (!nnz) {
2879       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2880       else if (nz < 0) nz = 1;
2881       for (i=0; i<mbs; i++) b->imax[i] = nz;
2882       nz = nz*mbs;
2883     } else {
2884       nz = 0;
2885       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2886     }
2887 
2888     /* allocate the matrix space */
2889     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2890     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2891     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2892     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2893     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2894 
2895     b->singlemalloc = PETSC_TRUE;
2896     b->i[0]         = 0;
2897     for (i=1; i<mbs+1; i++) {
2898       b->i[i] = b->i[i-1] + b->imax[i-1];
2899     }
2900     b->free_a  = PETSC_TRUE;
2901     b->free_ij = PETSC_TRUE;
2902 #if defined(PETSC_THREADCOMM_ACTIVE)
2903     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2904 #endif
2905   } else {
2906     b->free_a  = PETSC_FALSE;
2907     b->free_ij = PETSC_FALSE;
2908   }
2909 
2910   b->bs2              = bs2;
2911   b->mbs              = mbs;
2912   b->nz               = 0;
2913   b->maxnz            = nz;
2914   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2915   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2916   PetscFunctionReturn(0);
2917 }
2918 
2919 #undef __FUNCT__
2920 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2921 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2922 {
2923   PetscInt       i,m,nz,nz_max=0,*nnz;
2924   PetscScalar    *values=0;
2925   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2926   PetscErrorCode ierr;
2927 
2928   PetscFunctionBegin;
2929   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2930   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2931   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2932   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2933   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2934   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2935   m    = B->rmap->n/bs;
2936 
2937   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2938   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2939   for (i=0; i<m; i++) {
2940     nz = ii[i+1]- ii[i];
2941     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2942     nz_max = PetscMax(nz_max, nz);
2943     nnz[i] = nz;
2944   }
2945   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2946   ierr = PetscFree(nnz);CHKERRQ(ierr);
2947 
2948   values = (PetscScalar*)V;
2949   if (!values) {
2950     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2951   }
2952   for (i=0; i<m; i++) {
2953     PetscInt          ncols  = ii[i+1] - ii[i];
2954     const PetscInt    *icols = jj + ii[i];
2955     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2956     if (!roworiented) {
2957       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2958     } else {
2959       PetscInt j;
2960       for (j=0; j<ncols; j++) {
2961         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2962         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2963       }
2964     }
2965   }
2966   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2967   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2968   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2969   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2970   PetscFunctionReturn(0);
2971 }
2972 
2973 /*MC
2974    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2975    block sparse compressed row format.
2976 
2977    Options Database Keys:
2978 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2979 
2980   Level: beginner
2981 
2982 .seealso: MatCreateSeqBAIJ()
2983 M*/
2984 
2985 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2986 
2987 #undef __FUNCT__
2988 #define __FUNCT__ "MatCreate_SeqBAIJ"
2989 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2990 {
2991   PetscErrorCode ierr;
2992   PetscMPIInt    size;
2993   Mat_SeqBAIJ    *b;
2994 
2995   PetscFunctionBegin;
2996   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2997   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2998 
2999   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3000   B->data = (void*)b;
3001   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3002 
3003   b->row          = 0;
3004   b->col          = 0;
3005   b->icol         = 0;
3006   b->reallocs     = 0;
3007   b->saved_values = 0;
3008 
3009   b->roworiented        = PETSC_TRUE;
3010   b->nonew              = 0;
3011   b->diag               = 0;
3012   B->spptr              = 0;
3013   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3014   b->keepnonzeropattern = PETSC_FALSE;
3015 
3016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3032 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3033 {
3034   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3035   PetscErrorCode ierr;
3036   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3037 
3038   PetscFunctionBegin;
3039   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3040 
3041   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3042     c->imax           = a->imax;
3043     c->ilen           = a->ilen;
3044     c->free_imax_ilen = PETSC_FALSE;
3045   } else {
3046     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3047     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3048     for (i=0; i<mbs; i++) {
3049       c->imax[i] = a->imax[i];
3050       c->ilen[i] = a->ilen[i];
3051     }
3052     c->free_imax_ilen = PETSC_TRUE;
3053   }
3054 
3055   /* allocate the matrix space */
3056   if (mallocmatspace) {
3057     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3058       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3059       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3060 
3061       c->i            = a->i;
3062       c->j            = a->j;
3063       c->singlemalloc = PETSC_FALSE;
3064       c->free_a       = PETSC_TRUE;
3065       c->free_ij      = PETSC_FALSE;
3066       c->parent       = A;
3067       C->preallocated = PETSC_TRUE;
3068       C->assembled    = PETSC_TRUE;
3069 
3070       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3071       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3072       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3073     } else {
3074       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3075       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3076 
3077       c->singlemalloc = PETSC_TRUE;
3078       c->free_a       = PETSC_TRUE;
3079       c->free_ij      = PETSC_TRUE;
3080 
3081       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3082       if (mbs > 0) {
3083         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3084         if (cpvalues == MAT_COPY_VALUES) {
3085           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3086         } else {
3087           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3088         }
3089       }
3090       C->preallocated = PETSC_TRUE;
3091       C->assembled    = PETSC_TRUE;
3092     }
3093   }
3094 
3095   c->roworiented = a->roworiented;
3096   c->nonew       = a->nonew;
3097 
3098   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3099   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3100 
3101   c->bs2         = a->bs2;
3102   c->mbs         = a->mbs;
3103   c->nbs         = a->nbs;
3104 
3105   if (a->diag) {
3106     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3107       c->diag      = a->diag;
3108       c->free_diag = PETSC_FALSE;
3109     } else {
3110       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3111       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3112       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3113       c->free_diag = PETSC_TRUE;
3114     }
3115   } else c->diag = 0;
3116 
3117   c->nz         = a->nz;
3118   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3119   c->solve_work = NULL;
3120   c->mult_work  = NULL;
3121   c->sor_workt  = NULL;
3122   c->sor_work   = NULL;
3123 
3124   c->compressedrow.use   = a->compressedrow.use;
3125   c->compressedrow.nrows = a->compressedrow.nrows;
3126   if (a->compressedrow.use) {
3127     i    = a->compressedrow.nrows;
3128     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3129     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3130     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3131     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3132   } else {
3133     c->compressedrow.use    = PETSC_FALSE;
3134     c->compressedrow.i      = NULL;
3135     c->compressedrow.rindex = NULL;
3136   }
3137   C->nonzerostate = A->nonzerostate;
3138 
3139   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3140   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3141   PetscFunctionReturn(0);
3142 }
3143 
3144 #undef __FUNCT__
3145 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3146 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3147 {
3148   PetscErrorCode ierr;
3149 
3150   PetscFunctionBegin;
3151   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3152   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3153   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3154   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3155   PetscFunctionReturn(0);
3156 }
3157 
3158 #undef __FUNCT__
3159 #define __FUNCT__ "MatLoad_SeqBAIJ"
3160 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3161 {
3162   Mat_SeqBAIJ    *a;
3163   PetscErrorCode ierr;
3164   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3165   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3166   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3167   PetscInt       *masked,nmask,tmp,bs2,ishift;
3168   PetscMPIInt    size;
3169   int            fd;
3170   PetscScalar    *aa;
3171   MPI_Comm       comm;
3172 
3173   PetscFunctionBegin;
3174   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3175   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3176   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3177   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3178   if (bs < 0) bs = 1;
3179   bs2  = bs*bs;
3180 
3181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3182   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3183   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3184   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3185   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3186   M = header[1]; N = header[2]; nz = header[3];
3187 
3188   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3189   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3190 
3191   /*
3192      This code adds extra rows to make sure the number of rows is
3193     divisible by the blocksize
3194   */
3195   mbs        = M/bs;
3196   extra_rows = bs - M + bs*(mbs);
3197   if (extra_rows == bs) extra_rows = 0;
3198   else mbs++;
3199   if (extra_rows) {
3200     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3201   }
3202 
3203   /* Set global sizes if not already set */
3204   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3205     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3206   } else { /* Check if the matrix global sizes are correct */
3207     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3208     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3209       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3210     }
3211     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3212   }
3213 
3214   /* read in row lengths */
3215   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3216   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3217   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3218 
3219   /* read in column indices */
3220   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3221   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3222   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3223 
3224   /* loop over row lengths determining block row lengths */
3225   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3226   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3227   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3228   rowcount = 0;
3229   nzcount  = 0;
3230   for (i=0; i<mbs; i++) {
3231     nmask = 0;
3232     for (j=0; j<bs; j++) {
3233       kmax = rowlengths[rowcount];
3234       for (k=0; k<kmax; k++) {
3235         tmp = jj[nzcount++]/bs;
3236         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3237       }
3238       rowcount++;
3239     }
3240     browlengths[i] += nmask;
3241     /* zero out the mask elements we set */
3242     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3243   }
3244 
3245   /* Do preallocation  */
3246   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3247   a    = (Mat_SeqBAIJ*)newmat->data;
3248 
3249   /* set matrix "i" values */
3250   a->i[0] = 0;
3251   for (i=1; i<= mbs; i++) {
3252     a->i[i]      = a->i[i-1] + browlengths[i-1];
3253     a->ilen[i-1] = browlengths[i-1];
3254   }
3255   a->nz = 0;
3256   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3257 
3258   /* read in nonzero values */
3259   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3260   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3261   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3262 
3263   /* set "a" and "j" values into matrix */
3264   nzcount = 0; jcount = 0;
3265   for (i=0; i<mbs; i++) {
3266     nzcountb = nzcount;
3267     nmask    = 0;
3268     for (j=0; j<bs; j++) {
3269       kmax = rowlengths[i*bs+j];
3270       for (k=0; k<kmax; k++) {
3271         tmp = jj[nzcount++]/bs;
3272         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3273       }
3274     }
3275     /* sort the masked values */
3276     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3277 
3278     /* set "j" values into matrix */
3279     maskcount = 1;
3280     for (j=0; j<nmask; j++) {
3281       a->j[jcount++]  = masked[j];
3282       mask[masked[j]] = maskcount++;
3283     }
3284     /* set "a" values into matrix */
3285     ishift = bs2*a->i[i];
3286     for (j=0; j<bs; j++) {
3287       kmax = rowlengths[i*bs+j];
3288       for (k=0; k<kmax; k++) {
3289         tmp       = jj[nzcountb]/bs;
3290         block     = mask[tmp] - 1;
3291         point     = jj[nzcountb] - bs*tmp;
3292         idx       = ishift + bs2*block + j + bs*point;
3293         a->a[idx] = (MatScalar)aa[nzcountb++];
3294       }
3295     }
3296     /* zero out the mask elements we set */
3297     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3298   }
3299   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3300 
3301   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3302   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3303   ierr = PetscFree(aa);CHKERRQ(ierr);
3304   ierr = PetscFree(jj);CHKERRQ(ierr);
3305   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3306 
3307   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3308   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3309   PetscFunctionReturn(0);
3310 }
3311 
3312 #undef __FUNCT__
3313 #define __FUNCT__ "MatCreateSeqBAIJ"
3314 /*@C
3315    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3316    compressed row) format.  For good matrix assembly performance the
3317    user should preallocate the matrix storage by setting the parameter nz
3318    (or the array nnz).  By setting these parameters accurately, performance
3319    during matrix assembly can be increased by more than a factor of 50.
3320 
3321    Collective on MPI_Comm
3322 
3323    Input Parameters:
3324 +  comm - MPI communicator, set to PETSC_COMM_SELF
3325 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3326           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3327 .  m - number of rows
3328 .  n - number of columns
3329 .  nz - number of nonzero blocks  per block row (same for all rows)
3330 -  nnz - array containing the number of nonzero blocks in the various block rows
3331          (possibly different for each block row) or NULL
3332 
3333    Output Parameter:
3334 .  A - the matrix
3335 
3336    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3337    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3338    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3339 
3340    Options Database Keys:
3341 .   -mat_no_unroll - uses code that does not unroll the loops in the
3342                      block calculations (much slower)
3343 .    -mat_block_size - size of the blocks to use
3344 
3345    Level: intermediate
3346 
3347    Notes:
3348    The number of rows and columns must be divisible by blocksize.
3349 
3350    If the nnz parameter is given then the nz parameter is ignored
3351 
3352    A nonzero block is any block that as 1 or more nonzeros in it
3353 
3354    The block AIJ format is fully compatible with standard Fortran 77
3355    storage.  That is, the stored row and column indices can begin at
3356    either one (as in Fortran) or zero.  See the users' manual for details.
3357 
3358    Specify the preallocated storage with either nz or nnz (not both).
3359    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3360    allocation.  See Users-Manual: ch_mat for details.
3361    matrices.
3362 
3363 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3364 @*/
3365 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3366 {
3367   PetscErrorCode ierr;
3368 
3369   PetscFunctionBegin;
3370   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3371   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3372   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3373   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3374   PetscFunctionReturn(0);
3375 }
3376 
3377 #undef __FUNCT__
3378 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3379 /*@C
3380    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3381    per row in the matrix. For good matrix assembly performance the
3382    user should preallocate the matrix storage by setting the parameter nz
3383    (or the array nnz).  By setting these parameters accurately, performance
3384    during matrix assembly can be increased by more than a factor of 50.
3385 
3386    Collective on MPI_Comm
3387 
3388    Input Parameters:
3389 +  B - the matrix
3390 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3391           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3392 .  nz - number of block nonzeros per block row (same for all rows)
3393 -  nnz - array containing the number of block nonzeros in the various block rows
3394          (possibly different for each block row) or NULL
3395 
3396    Options Database Keys:
3397 .   -mat_no_unroll - uses code that does not unroll the loops in the
3398                      block calculations (much slower)
3399 .    -mat_block_size - size of the blocks to use
3400 
3401    Level: intermediate
3402 
3403    Notes:
3404    If the nnz parameter is given then the nz parameter is ignored
3405 
3406    You can call MatGetInfo() to get information on how effective the preallocation was;
3407    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3408    You can also run with the option -info and look for messages with the string
3409    malloc in them to see if additional memory allocation was needed.
3410 
3411    The block AIJ format is fully compatible with standard Fortran 77
3412    storage.  That is, the stored row and column indices can begin at
3413    either one (as in Fortran) or zero.  See the users' manual for details.
3414 
3415    Specify the preallocated storage with either nz or nnz (not both).
3416    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3417    allocation.  See Users-Manual: ch_mat for details.
3418 
3419 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3420 @*/
3421 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3422 {
3423   PetscErrorCode ierr;
3424 
3425   PetscFunctionBegin;
3426   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3427   PetscValidType(B,1);
3428   PetscValidLogicalCollectiveInt(B,bs,2);
3429   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3430   PetscFunctionReturn(0);
3431 }
3432 
3433 #undef __FUNCT__
3434 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3435 /*@C
3436    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3437    (the default sequential PETSc format).
3438 
3439    Collective on MPI_Comm
3440 
3441    Input Parameters:
3442 +  B - the matrix
3443 .  i - the indices into j for the start of each local row (starts with zero)
3444 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3445 -  v - optional values in the matrix
3446 
3447    Level: developer
3448 
3449    Notes:
3450    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3451    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3452    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3453    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3454    block column and the second index is over columns within a block.
3455 
3456 .keywords: matrix, aij, compressed row, sparse
3457 
3458 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3459 @*/
3460 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3461 {
3462   PetscErrorCode ierr;
3463 
3464   PetscFunctionBegin;
3465   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3466   PetscValidType(B,1);
3467   PetscValidLogicalCollectiveInt(B,bs,2);
3468   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3469   PetscFunctionReturn(0);
3470 }
3471 
3472 
3473 #undef __FUNCT__
3474 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3475 /*@
3476      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3477 
3478      Collective on MPI_Comm
3479 
3480    Input Parameters:
3481 +  comm - must be an MPI communicator of size 1
3482 .  bs - size of block
3483 .  m - number of rows
3484 .  n - number of columns
3485 .  i - row indices
3486 .  j - column indices
3487 -  a - matrix values
3488 
3489    Output Parameter:
3490 .  mat - the matrix
3491 
3492    Level: advanced
3493 
3494    Notes:
3495        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3496     once the matrix is destroyed
3497 
3498        You cannot set new nonzero locations into this matrix, that will generate an error.
3499 
3500        The i and j indices are 0 based
3501 
3502        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3503 
3504       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3505       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3506       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3507       with column-major ordering within blocks.
3508 
3509 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3510 
3511 @*/
3512 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3513 {
3514   PetscErrorCode ierr;
3515   PetscInt       ii;
3516   Mat_SeqBAIJ    *baij;
3517 
3518   PetscFunctionBegin;
3519   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3520   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3521 
3522   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3523   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3524   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3525   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3526   baij = (Mat_SeqBAIJ*)(*mat)->data;
3527   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3528   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3529 
3530   baij->i = i;
3531   baij->j = j;
3532   baij->a = a;
3533 
3534   baij->singlemalloc = PETSC_FALSE;
3535   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3536   baij->free_a       = PETSC_FALSE;
3537   baij->free_ij      = PETSC_FALSE;
3538 
3539   for (ii=0; ii<m; ii++) {
3540     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3541 #if defined(PETSC_USE_DEBUG)
3542     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3543 #endif
3544   }
3545 #if defined(PETSC_USE_DEBUG)
3546   for (ii=0; ii<baij->i[m]; ii++) {
3547     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3548     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3549   }
3550 #endif
3551 
3552   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3553   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3554   PetscFunctionReturn(0);
3555 }
3556 
3557 #undef __FUNCT__
3558 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3559 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3560 {
3561   PetscErrorCode ierr;
3562 
3563   PetscFunctionBegin;
3564   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3565   PetscFunctionReturn(0);
3566 }
3567