xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 8895d3f2aaf765dc4e8d62d73f37241d00bd6bbd)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
156   }
157   if (!a->sor_workt) {
158     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
159   }
160   if (!a->sor_work) {
161     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
162   }
163   work = a->mult_work;
164   t    = a->sor_workt;
165   w    = a->sor_work;
166 
167   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
168   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
169 
170   if (flag & SOR_ZERO_INITIAL_GUESS) {
171     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
172       switch (bs) {
173       case 1:
174         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
175         t[0] = b[0];
176         i2     = 1;
177         idiag += 1;
178         for (i=1; i<m; i++) {
179           v  = aa + ai[i];
180           vi = aj + ai[i];
181           nz = diag[i] - ai[i];
182           s[0] = b[i2];
183           for (j=0; j<nz; j++) {
184             xw[0] = x[vi[j]];
185             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
186           }
187           t[i2] = s[0];
188           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
189           x[i2]  = xw[0];
190           idiag += 1;
191           i2    += 1;
192         }
193         break;
194       case 2:
195         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
196         t[0] = b[0]; t[1] = b[1];
197         i2     = 2;
198         idiag += 4;
199         for (i=1; i<m; i++) {
200           v  = aa + 4*ai[i];
201           vi = aj + ai[i];
202           nz = diag[i] - ai[i];
203           s[0] = b[i2]; s[1] = b[i2+1];
204           for (j=0; j<nz; j++) {
205             idx = 2*vi[j];
206             it  = 4*j;
207             xw[0] = x[idx]; xw[1] = x[1+idx];
208             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
209           }
210           t[i2] = s[0]; t[i2+1] = s[1];
211           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
212           x[i2]   = xw[0]; x[i2+1] = xw[1];
213           idiag  += 4;
214           i2     += 2;
215         }
216         break;
217       case 3:
218         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
219         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
220         i2     = 3;
221         idiag += 9;
222         for (i=1; i<m; i++) {
223           v  = aa + 9*ai[i];
224           vi = aj + ai[i];
225           nz = diag[i] - ai[i];
226           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
227           while (nz--) {
228             idx = 3*(*vi++);
229             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
230             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
231             v  += 9;
232           }
233           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
234           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
235           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
236           idiag  += 9;
237           i2     += 3;
238         }
239         break;
240       case 4:
241         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
242         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
243         i2     = 4;
244         idiag += 16;
245         for (i=1; i<m; i++) {
246           v  = aa + 16*ai[i];
247           vi = aj + ai[i];
248           nz = diag[i] - ai[i];
249           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
250           while (nz--) {
251             idx = 4*(*vi++);
252             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
253             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
254             v  += 16;
255           }
256           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
257           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
258           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
259           idiag  += 16;
260           i2     += 4;
261         }
262         break;
263       case 5:
264         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
265         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
266         i2     = 5;
267         idiag += 25;
268         for (i=1; i<m; i++) {
269           v  = aa + 25*ai[i];
270           vi = aj + ai[i];
271           nz = diag[i] - ai[i];
272           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
273           while (nz--) {
274             idx = 5*(*vi++);
275             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
276             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
277             v  += 25;
278           }
279           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
280           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
281           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
282           idiag  += 25;
283           i2     += 5;
284         }
285         break;
286       case 6:
287         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
288         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
289         i2     = 6;
290         idiag += 36;
291         for (i=1; i<m; i++) {
292           v  = aa + 36*ai[i];
293           vi = aj + ai[i];
294           nz = diag[i] - ai[i];
295           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
296           while (nz--) {
297             idx = 6*(*vi++);
298             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
299             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
300             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
301             v  += 36;
302           }
303           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
304           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
305           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
306           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
307           idiag  += 36;
308           i2     += 6;
309         }
310         break;
311       case 7:
312         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
313         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
314         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
315         i2     = 7;
316         idiag += 49;
317         for (i=1; i<m; i++) {
318           v  = aa + 49*ai[i];
319           vi = aj + ai[i];
320           nz = diag[i] - ai[i];
321           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
322           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
323           while (nz--) {
324             idx = 7*(*vi++);
325             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
326             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
327             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
328             v  += 49;
329           }
330           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
331           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
332           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
333           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
334           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
335           idiag  += 49;
336           i2     += 7;
337         }
338         break;
339       default:
340         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
341         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
342         i2     = bs;
343         idiag += bs2;
344         for (i=1; i<m; i++) {
345           v  = aa + bs2*ai[i];
346           vi = aj + ai[i];
347           nz = diag[i] - ai[i];
348 
349           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
350           /* copy all rows of x that are needed into contiguous space */
351           workt = work;
352           for (j=0; j<nz; j++) {
353             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
354             workt += bs;
355           }
356           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
357           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
358           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
359 
360           idiag += bs2;
361           i2    += bs;
362         }
363         break;
364       }
365       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
366       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
367       xb = t;
368     }
369     else xb = b;
370     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
371       idiag = a->idiag+bs2*(a->mbs-1);
372       i2 = bs * (m-1);
373       switch (bs) {
374       case 1:
375         s[0]  = xb[i2];
376         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
377         x[i2] = xw[0];
378         i2   -= 1;
379         for (i=m-2; i>=0; i--) {
380           v  = aa + (diag[i]+1);
381           vi = aj + diag[i] + 1;
382           nz = ai[i+1] - diag[i] - 1;
383           s[0] = xb[i2];
384           for (j=0; j<nz; j++) {
385             xw[0] = x[vi[j]];
386             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
387           }
388           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
389           x[i2]  = xw[0];
390           idiag -= 1;
391           i2    -= 1;
392         }
393         break;
394       case 2:
395         s[0]  = xb[i2]; s[1] = xb[i2+1];
396         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
397         x[i2] = xw[0]; x[i2+1] = xw[1];
398         i2    -= 2;
399         idiag -= 4;
400         for (i=m-2; i>=0; i--) {
401           v  = aa + 4*(diag[i] + 1);
402           vi = aj + diag[i] + 1;
403           nz = ai[i+1] - diag[i] - 1;
404           s[0] = xb[i2]; s[1] = xb[i2+1];
405           for (j=0; j<nz; j++) {
406             idx = 2*vi[j];
407             it  = 4*j;
408             xw[0] = x[idx]; xw[1] = x[1+idx];
409             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
410           }
411           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
412           x[i2]   = xw[0]; x[i2+1] = xw[1];
413           idiag  -= 4;
414           i2     -= 2;
415         }
416         break;
417       case 3:
418         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
419         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
420         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
421         i2    -= 3;
422         idiag -= 9;
423         for (i=m-2; i>=0; i--) {
424           v  = aa + 9*(diag[i]+1);
425           vi = aj + diag[i] + 1;
426           nz = ai[i+1] - diag[i] - 1;
427           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
428           while (nz--) {
429             idx = 3*(*vi++);
430             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
431             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
432             v  += 9;
433           }
434           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
435           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
436           idiag  -= 9;
437           i2     -= 3;
438         }
439         break;
440       case 4:
441         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
442         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
443         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
444         i2    -= 4;
445         idiag -= 16;
446         for (i=m-2; i>=0; i--) {
447           v  = aa + 16*(diag[i]+1);
448           vi = aj + diag[i] + 1;
449           nz = ai[i+1] - diag[i] - 1;
450           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
451           while (nz--) {
452             idx = 4*(*vi++);
453             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
454             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
455             v  += 16;
456           }
457           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
458           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
459           idiag  -= 16;
460           i2     -= 4;
461         }
462         break;
463       case 5:
464         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
465         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
466         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
467         i2    -= 5;
468         idiag -= 25;
469         for (i=m-2; i>=0; i--) {
470           v  = aa + 25*(diag[i]+1);
471           vi = aj + diag[i] + 1;
472           nz = ai[i+1] - diag[i] - 1;
473           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
474           while (nz--) {
475             idx = 5*(*vi++);
476             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
477             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
478             v  += 25;
479           }
480           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
481           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
482           idiag  -= 25;
483           i2     -= 5;
484         }
485         break;
486       case 6:
487         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
488         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
489         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
490         i2    -= 6;
491         idiag -= 36;
492         for (i=m-2; i>=0; i--) {
493           v  = aa + 36*(diag[i]+1);
494           vi = aj + diag[i] + 1;
495           nz = ai[i+1] - diag[i] - 1;
496           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
497           while (nz--) {
498             idx = 6*(*vi++);
499             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
500             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
501             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
502             v  += 36;
503           }
504           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
505           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
506           idiag  -= 36;
507           i2     -= 6;
508         }
509         break;
510       case 7:
511         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
512         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
513         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
514         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
515         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
516         i2    -= 7;
517         idiag -= 49;
518         for (i=m-2; i>=0; i--) {
519           v  = aa + 49*(diag[i]+1);
520           vi = aj + diag[i] + 1;
521           nz = ai[i+1] - diag[i] - 1;
522           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
523           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
524           while (nz--) {
525             idx = 7*(*vi++);
526             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
527             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
528             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
529             v  += 49;
530           }
531           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
532           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
533           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
534           idiag  -= 49;
535           i2     -= 7;
536         }
537         break;
538       default:
539         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
540         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
541         i2    -= bs;
542         idiag -= bs2;
543         for (i=m-2; i>=0; i--) {
544           v  = aa + bs2*(diag[i]+1);
545           vi = aj + diag[i] + 1;
546           nz = ai[i+1] - diag[i] - 1;
547 
548           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
549           /* copy all rows of x that are needed into contiguous space */
550           workt = work;
551           for (j=0; j<nz; j++) {
552             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
553             workt += bs;
554           }
555           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
556           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
557 
558           idiag -= bs2;
559           i2    -= bs;
560         }
561         break;
562       }
563       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
564     }
565     its--;
566   }
567   while (its--) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       idiag = a->idiag;
570       i2 = 0;
571       switch (bs) {
572       case 1:
573         for (i=0; i<m; i++) {
574           v  = aa + ai[i];
575           vi = aj + ai[i];
576           nz = ai[i+1] - ai[i];
577           s[0] = b[i2];
578           for (j=0; j<nz; j++) {
579             xw[0] = x[vi[j]];
580             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
581           }
582           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
583           x[i2] += xw[0];
584           idiag += 1;
585           i2    += 1;
586         }
587         break;
588       case 2:
589         for (i=0; i<m; i++) {
590           v  = aa + 4*ai[i];
591           vi = aj + ai[i];
592           nz = ai[i+1] - ai[i];
593           s[0] = b[i2]; s[1] = b[i2+1];
594           for (j=0; j<nz; j++) {
595             idx = 2*vi[j];
596             it  = 4*j;
597             xw[0] = x[idx]; xw[1] = x[1+idx];
598             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
599           }
600           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
601           x[i2]  += xw[0]; x[i2+1] += xw[1];
602           idiag  += 4;
603           i2     += 2;
604         }
605         break;
606       case 3:
607         for (i=0; i<m; i++) {
608           v  = aa + 9*ai[i];
609           vi = aj + ai[i];
610           nz = ai[i+1] - ai[i];
611           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
612           while (nz--) {
613             idx = 3*(*vi++);
614             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
615             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
616             v  += 9;
617           }
618           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
619           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
620           idiag  += 9;
621           i2     += 3;
622         }
623         break;
624       case 4:
625         for (i=0; i<m; i++) {
626           v  = aa + 16*ai[i];
627           vi = aj + ai[i];
628           nz = ai[i+1] - ai[i];
629           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
630           while (nz--) {
631             idx = 4*(*vi++);
632             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
633             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
634             v  += 16;
635           }
636           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
637           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
638           idiag  += 16;
639           i2     += 4;
640         }
641         break;
642       case 5:
643         for (i=0; i<m; i++) {
644           v  = aa + 25*ai[i];
645           vi = aj + ai[i];
646           nz = ai[i+1] - ai[i];
647           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
648           while (nz--) {
649             idx = 5*(*vi++);
650             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
651             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
652             v  += 25;
653           }
654           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
655           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
656           idiag  += 25;
657           i2     += 5;
658         }
659         break;
660       case 6:
661         for (i=0; i<m; i++) {
662           v  = aa + 36*ai[i];
663           vi = aj + ai[i];
664           nz = ai[i+1] - ai[i];
665           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
666           while (nz--) {
667             idx = 6*(*vi++);
668             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
669             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
670             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
671             v  += 36;
672           }
673           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
674           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
675           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
676           idiag  += 36;
677           i2     += 6;
678         }
679         break;
680       case 7:
681         for (i=0; i<m; i++) {
682           v  = aa + 49*ai[i];
683           vi = aj + ai[i];
684           nz = ai[i+1] - ai[i];
685           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
686           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
687           while (nz--) {
688             idx = 7*(*vi++);
689             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
690             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
691             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
692             v  += 49;
693           }
694           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
695           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
696           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
697           idiag  += 49;
698           i2     += 7;
699         }
700         break;
701       default:
702         for (i=0; i<m; i++) {
703           v  = aa + bs2*ai[i];
704           vi = aj + ai[i];
705           nz = ai[i+1] - ai[i];
706 
707           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
708           /* copy all rows of x that are needed into contiguous space */
709           workt = work;
710           for (j=0; j<nz; j++) {
711             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
712             workt += bs;
713           }
714           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
715           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
716 
717           idiag += bs2;
718           i2    += bs;
719         }
720         break;
721       }
722       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
723     }
724     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
725       idiag = a->idiag+bs2*(a->mbs-1);
726       i2 = bs * (m-1);
727       switch (bs) {
728       case 1:
729         for (i=m-1; i>=0; i--) {
730           v  = aa + ai[i];
731           vi = aj + ai[i];
732           nz = ai[i+1] - ai[i];
733           s[0] = b[i2];
734           for (j=0; j<nz; j++) {
735             xw[0] = x[vi[j]];
736             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
737           }
738           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
739           x[i2] += xw[0];
740           idiag -= 1;
741           i2    -= 1;
742         }
743         break;
744       case 2:
745         for (i=m-1; i>=0; i--) {
746           v  = aa + 4*ai[i];
747           vi = aj + ai[i];
748           nz = ai[i+1] - ai[i];
749           s[0] = b[i2]; s[1] = b[i2+1];
750           for (j=0; j<nz; j++) {
751             idx = 2*vi[j];
752             it  = 4*j;
753             xw[0] = x[idx]; xw[1] = x[1+idx];
754             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
755           }
756           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
757           x[i2]  += xw[0]; x[i2+1] += xw[1];
758           idiag  -= 4;
759           i2     -= 2;
760         }
761         break;
762       case 3:
763         for (i=m-1; i>=0; i--) {
764           v  = aa + 9*ai[i];
765           vi = aj + ai[i];
766           nz = ai[i+1] - ai[i];
767           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
768           while (nz--) {
769             idx = 3*(*vi++);
770             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
771             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
772             v  += 9;
773           }
774           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
775           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
776           idiag  -= 9;
777           i2     -= 3;
778         }
779         break;
780       case 4:
781         for (i=m-1; i>=0; i--) {
782           v  = aa + 16*ai[i];
783           vi = aj + ai[i];
784           nz = ai[i+1] - ai[i];
785           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
786           while (nz--) {
787             idx = 4*(*vi++);
788             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
789             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
790             v  += 16;
791           }
792           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
793           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
794           idiag  -= 16;
795           i2     -= 4;
796         }
797         break;
798       case 5:
799         for (i=m-1; i>=0; i--) {
800           v  = aa + 25*ai[i];
801           vi = aj + ai[i];
802           nz = ai[i+1] - ai[i];
803           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
804           while (nz--) {
805             idx = 5*(*vi++);
806             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
807             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
808             v  += 25;
809           }
810           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
811           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
812           idiag  -= 25;
813           i2     -= 5;
814         }
815         break;
816       case 6:
817         for (i=m-1; i>=0; i--) {
818           v  = aa + 36*ai[i];
819           vi = aj + ai[i];
820           nz = ai[i+1] - ai[i];
821           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
822           while (nz--) {
823             idx = 6*(*vi++);
824             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
825             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
826             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
827             v  += 36;
828           }
829           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
830           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
831           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
832           idiag  -= 36;
833           i2     -= 6;
834         }
835         break;
836       case 7:
837         for (i=m-1; i>=0; i--) {
838           v  = aa + 49*ai[i];
839           vi = aj + ai[i];
840           nz = ai[i+1] - ai[i];
841           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
842           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
843           while (nz--) {
844             idx = 7*(*vi++);
845             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
846             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
847             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
848             v  += 49;
849           }
850           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
851           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
852           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
853           idiag  -= 49;
854           i2     -= 7;
855         }
856         break;
857       default:
858         for (i=m-1; i>=0; i--) {
859           v  = aa + bs2*ai[i];
860           vi = aj + ai[i];
861           nz = ai[i+1] - ai[i];
862 
863           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           /* copy all rows of x that are needed into contiguous space */
865           workt = work;
866           for (j=0; j<nz; j++) {
867             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
868             workt += bs;
869           }
870           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
871           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
872 
873           idiag -= bs2;
874           i2    -= bs;
875         }
876         break;
877       }
878       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
879     }
880   }
881   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
882   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
883   PetscFunctionReturn(0);
884 }
885 
886 
887 /*
888     Special version for direct calls from Fortran (Used in PETSc-fun3d)
889 */
890 #if defined(PETSC_HAVE_FORTRAN_CAPS)
891 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
892 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
893 #define matsetvaluesblocked4_ matsetvaluesblocked4
894 #endif
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "matsetvaluesblocked4_"
898 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
899 {
900   Mat               A  = *AA;
901   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
902   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
903   PetscInt          *ai    =a->i,*ailen=a->ilen;
904   PetscInt          *aj    =a->j,stepval,lastcol = -1;
905   const PetscScalar *value = v;
906   MatScalar         *ap,*aa = a->a,*bap;
907 
908   PetscFunctionBegin;
909   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
910   stepval = (n-1)*4;
911   for (k=0; k<m; k++) { /* loop over added rows */
912     row  = im[k];
913     rp   = aj + ai[row];
914     ap   = aa + 16*ai[row];
915     nrow = ailen[row];
916     low  = 0;
917     high = nrow;
918     for (l=0; l<n; l++) { /* loop over added columns */
919       col = in[l];
920       if (col <= lastcol)  low = 0;
921       else                high = nrow;
922       lastcol = col;
923       value   = v + k*(stepval+4 + l)*4;
924       while (high-low > 7) {
925         t = (low+high)/2;
926         if (rp[t] > col) high = t;
927         else             low  = t;
928       }
929       for (i=low; i<high; i++) {
930         if (rp[i] > col) break;
931         if (rp[i] == col) {
932           bap = ap +  16*i;
933           for (ii=0; ii<4; ii++,value+=stepval) {
934             for (jj=ii; jj<16; jj+=4) {
935               bap[jj] += *value++;
936             }
937           }
938           goto noinsert2;
939         }
940       }
941       N = nrow++ - 1;
942       high++; /* added new column index thus must search to one higher than before */
943       /* shift up all the later entries in this row */
944       for (ii=N; ii>=i; ii--) {
945         rp[ii+1] = rp[ii];
946         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
947       }
948       if (N >= i) {
949         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
950       }
951       rp[i] = col;
952       bap   = ap +  16*i;
953       for (ii=0; ii<4; ii++,value+=stepval) {
954         for (jj=ii; jj<16; jj+=4) {
955           bap[jj] = *value++;
956         }
957       }
958       noinsert2:;
959       low = i;
960     }
961     ailen[row] = nrow;
962   }
963   PetscFunctionReturnVoid();
964 }
965 
966 #if defined(PETSC_HAVE_FORTRAN_CAPS)
967 #define matsetvalues4_ MATSETVALUES4
968 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
969 #define matsetvalues4_ matsetvalues4
970 #endif
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatSetValues4_"
974 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
975 {
976   Mat         A  = *AA;
977   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
978   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
979   PetscInt    *ai=a->i,*ailen=a->ilen;
980   PetscInt    *aj=a->j,brow,bcol;
981   PetscInt    ridx,cidx,lastcol = -1;
982   MatScalar   *ap,value,*aa=a->a,*bap;
983 
984   PetscFunctionBegin;
985   for (k=0; k<m; k++) { /* loop over added rows */
986     row  = im[k]; brow = row/4;
987     rp   = aj + ai[brow];
988     ap   = aa + 16*ai[brow];
989     nrow = ailen[brow];
990     low  = 0;
991     high = nrow;
992     for (l=0; l<n; l++) { /* loop over added columns */
993       col   = in[l]; bcol = col/4;
994       ridx  = row % 4; cidx = col % 4;
995       value = v[l + k*n];
996       if (col <= lastcol)  low = 0;
997       else                high = nrow;
998       lastcol = col;
999       while (high-low > 7) {
1000         t = (low+high)/2;
1001         if (rp[t] > bcol) high = t;
1002         else              low  = t;
1003       }
1004       for (i=low; i<high; i++) {
1005         if (rp[i] > bcol) break;
1006         if (rp[i] == bcol) {
1007           bap   = ap +  16*i + 4*cidx + ridx;
1008           *bap += value;
1009           goto noinsert1;
1010         }
1011       }
1012       N = nrow++ - 1;
1013       high++; /* added new column thus must search to one higher than before */
1014       /* shift up all the later entries in this row */
1015       for (ii=N; ii>=i; ii--) {
1016         rp[ii+1] = rp[ii];
1017         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1018       }
1019       if (N>=i) {
1020         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1021       }
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1037 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1038 {
1039   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041   PetscInt       *diag,*ii = a->i,i;
1042 
1043   PetscFunctionBegin;
1044   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1045   *missing = PETSC_FALSE;
1046   if (A->rmap->n > 0 && !ii) {
1047     *missing = PETSC_TRUE;
1048     if (d) *d = 0;
1049     PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");
1050   } else {
1051     diag = a->diag;
1052     for (i=0; i<a->mbs; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1066 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1067 {
1068   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070   PetscInt       i,j,m = a->mbs;
1071 
1072   PetscFunctionBegin;
1073   if (!a->diag) {
1074     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1075     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1076     a->free_diag = PETSC_TRUE;
1077   }
1078   for (i=0; i<m; i++) {
1079     a->diag[i] = a->i[i+1];
1080     for (j=a->i[i]; j<a->i[i+1]; j++) {
1081       if (a->j[j] == i) {
1082         a->diag[i] = j;
1083         break;
1084       }
1085     }
1086   }
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1093 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1094 {
1095   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1096   PetscErrorCode ierr;
1097   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1098   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1099 
1100   PetscFunctionBegin;
1101   *nn = n;
1102   if (!ia) PetscFunctionReturn(0);
1103   if (symmetric) {
1104     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1105     nz   = tia[n];
1106   } else {
1107     tia = a->i; tja = a->j;
1108   }
1109 
1110   if (!blockcompressed && bs > 1) {
1111     (*nn) *= bs;
1112     /* malloc & create the natural set of indices */
1113     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1114     if (n) {
1115       (*ia)[0] = 0;
1116       for (j=1; j<bs; j++) {
1117         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1118       }
1119     }
1120 
1121     for (i=1; i<n; i++) {
1122       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1123       for (j=1; j<bs; j++) {
1124         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1125       }
1126     }
1127     if (n) {
1128       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1129     }
1130 
1131     if (inja) {
1132       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1133       cnt = 0;
1134       for (i=0; i<n; i++) {
1135         for (j=0; j<bs; j++) {
1136           for (k=tia[i]; k<tia[i+1]; k++) {
1137             for (l=0; l<bs; l++) {
1138               (*ja)[cnt++] = bs*tja[k] + l;
1139             }
1140           }
1141         }
1142       }
1143     }
1144 
1145     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1146       ierr = PetscFree(tia);CHKERRQ(ierr);
1147       ierr = PetscFree(tja);CHKERRQ(ierr);
1148     }
1149   } else if (oshift == 1) {
1150     if (symmetric) {
1151       nz = tia[A->rmap->n/bs];
1152       /*  add 1 to i and j indices */
1153       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1154       *ia = tia;
1155       if (ja) {
1156         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1157         *ja = tja;
1158       }
1159     } else {
1160       nz = a->i[A->rmap->n/bs];
1161       /* malloc space and  add 1 to i and j indices */
1162       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1163       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1164       if (ja) {
1165         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1166         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1167       }
1168     }
1169   } else {
1170     *ia = tia;
1171     if (ja) *ja = tja;
1172   }
1173   PetscFunctionReturn(0);
1174 }
1175 
1176 #undef __FUNCT__
1177 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1178 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1179 {
1180   PetscErrorCode ierr;
1181 
1182   PetscFunctionBegin;
1183   if (!ia) PetscFunctionReturn(0);
1184   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1185     ierr = PetscFree(*ia);CHKERRQ(ierr);
1186     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1187   }
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1193 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1194 {
1195   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199 #if defined(PETSC_USE_LOG)
1200   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1201 #endif
1202   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1205   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1206   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1207   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1208   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1209   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1210   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1211   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1212   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1213   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1214   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1215 
1216   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1217   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1218   ierr = PetscFree(A->data);CHKERRQ(ierr);
1219 
1220   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1236 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1237 {
1238   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   switch (op) {
1243   case MAT_ROW_ORIENTED:
1244     a->roworiented = flg;
1245     break;
1246   case MAT_KEEP_NONZERO_PATTERN:
1247     a->keepnonzeropattern = flg;
1248     break;
1249   case MAT_NEW_NONZERO_LOCATIONS:
1250     a->nonew = (flg ? 0 : 1);
1251     break;
1252   case MAT_NEW_NONZERO_LOCATION_ERR:
1253     a->nonew = (flg ? -1 : 0);
1254     break;
1255   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1256     a->nonew = (flg ? -2 : 0);
1257     break;
1258   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1259     a->nounused = (flg ? -1 : 0);
1260     break;
1261   case MAT_NEW_DIAGONALS:
1262   case MAT_IGNORE_OFF_PROC_ENTRIES:
1263   case MAT_USE_HASH_TABLE:
1264     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1265     break;
1266   case MAT_SPD:
1267   case MAT_SYMMETRIC:
1268   case MAT_STRUCTURALLY_SYMMETRIC:
1269   case MAT_HERMITIAN:
1270   case MAT_SYMMETRY_ETERNAL:
1271     /* These options are handled directly by MatSetOption() */
1272     break;
1273   default:
1274     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1275   }
1276   PetscFunctionReturn(0);
1277 }
1278 
1279 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatGetRow_SeqBAIJ_private"
1282 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1283 {
1284   PetscErrorCode ierr;
1285   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1286   MatScalar      *aa_i;
1287   PetscScalar    *v_i;
1288 
1289   PetscFunctionBegin;
1290   bs  = A->rmap->bs;
1291   bs2 = bs*bs;
1292   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1293 
1294   bn  = row/bs;   /* Block number */
1295   bp  = row % bs; /* Block Position */
1296   M   = ai[bn+1] - ai[bn];
1297   *nz = bs*M;
1298 
1299   if (v) {
1300     *v = 0;
1301     if (*nz) {
1302       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1303       for (i=0; i<M; i++) { /* for each block in the block row */
1304         v_i  = *v + i*bs;
1305         aa_i = aa + bs2*(ai[bn] + i);
1306         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1307       }
1308     }
1309   }
1310 
1311   if (idx) {
1312     *idx = 0;
1313     if (*nz) {
1314       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1315       for (i=0; i<M; i++) { /* for each block in the block row */
1316         idx_i = *idx + i*bs;
1317         itmp  = bs*aj[ai[bn] + i];
1318         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1319       }
1320     }
1321   }
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1327 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1328 {
1329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1334   PetscFunctionReturn(0);
1335 }
1336 
1337 #undef __FUNCT__
1338 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1339 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1340 {
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1345   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1353 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1354 {
1355   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1356   Mat            C;
1357   PetscErrorCode ierr;
1358   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1359   PetscInt       *rows,*cols,bs2=a->bs2;
1360   MatScalar      *array;
1361 
1362   PetscFunctionBegin;
1363   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1364   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1365     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1366 
1367     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1368     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1369     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1370     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1371     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1372     ierr = PetscFree(col);CHKERRQ(ierr);
1373   } else {
1374     C = *B;
1375   }
1376 
1377   array = a->a;
1378   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1379   for (i=0; i<mbs; i++) {
1380     cols[0] = i*bs;
1381     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1382     len = ai[i+1] - ai[i];
1383     for (j=0; j<len; j++) {
1384       rows[0] = (*aj++)*bs;
1385       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1386       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1387       array += bs2;
1388     }
1389   }
1390   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1391 
1392   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1405 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1406 {
1407   PetscErrorCode ierr;
1408   Mat            Btrans;
1409 
1410   PetscFunctionBegin;
1411   *f   = PETSC_FALSE;
1412   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1413   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1414   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #undef __FUNCT__
1419 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1420 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1421 {
1422   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1423   PetscErrorCode ierr;
1424   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1425   int            fd;
1426   PetscScalar    *aa;
1427   FILE           *file;
1428 
1429   PetscFunctionBegin;
1430   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1431   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1432   col_lens[0] = MAT_FILE_CLASSID;
1433 
1434   col_lens[1] = A->rmap->N;
1435   col_lens[2] = A->cmap->n;
1436   col_lens[3] = a->nz*bs2;
1437 
1438   /* store lengths of each row and write (including header) to file */
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1443     }
1444   }
1445   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1446   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1447 
1448   /* store column indices (zero start index) */
1449   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1450   count = 0;
1451   for (i=0; i<a->mbs; i++) {
1452     for (j=0; j<bs; j++) {
1453       for (k=a->i[i]; k<a->i[i+1]; k++) {
1454         for (l=0; l<bs; l++) {
1455           jj[count++] = bs*a->j[k] + l;
1456         }
1457       }
1458     }
1459   }
1460   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1461   ierr = PetscFree(jj);CHKERRQ(ierr);
1462 
1463   /* store nonzero values */
1464   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1465   count = 0;
1466   for (i=0; i<a->mbs; i++) {
1467     for (j=0; j<bs; j++) {
1468       for (k=a->i[i]; k<a->i[i+1]; k++) {
1469         for (l=0; l<bs; l++) {
1470           aa[count++] = a->a[bs2*k + l*bs + j];
1471         }
1472       }
1473     }
1474   }
1475   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1476   ierr = PetscFree(aa);CHKERRQ(ierr);
1477 
1478   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1479   if (file) {
1480     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1481   }
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 #undef __FUNCT__
1486 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1487 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1488 {
1489   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1490   PetscErrorCode    ierr;
1491   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1492   PetscViewerFormat format;
1493 
1494   PetscFunctionBegin;
1495   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1496   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1497     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1498   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1499     const char *matname;
1500     Mat        aij;
1501     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1502     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1503     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1504     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1505     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1506   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1507       PetscFunctionReturn(0);
1508   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1509     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1510     for (i=0; i<a->mbs; i++) {
1511       for (j=0; j<bs; j++) {
1512         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1513         for (k=a->i[i]; k<a->i[i+1]; k++) {
1514           for (l=0; l<bs; l++) {
1515 #if defined(PETSC_USE_COMPLEX)
1516             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1517               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1518                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1519             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1520               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1521                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1522             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1523               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1524             }
1525 #else
1526             if (a->a[bs2*k + l*bs + j] != 0.0) {
1527               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1528             }
1529 #endif
1530           }
1531         }
1532         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1533       }
1534     }
1535     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1536   } else {
1537     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1538     for (i=0; i<a->mbs; i++) {
1539       for (j=0; j<bs; j++) {
1540         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1541         for (k=a->i[i]; k<a->i[i+1]; k++) {
1542           for (l=0; l<bs; l++) {
1543 #if defined(PETSC_USE_COMPLEX)
1544             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1546                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1547             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1549                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1550             } else {
1551               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1552             }
1553 #else
1554             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1555 #endif
1556           }
1557         }
1558         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1559       }
1560     }
1561     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1562   }
1563   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1564   PetscFunctionReturn(0);
1565 }
1566 
1567 #include <petscdraw.h>
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1570 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1571 {
1572   Mat               A = (Mat) Aa;
1573   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1574   PetscErrorCode    ierr;
1575   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1576   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1577   MatScalar         *aa;
1578   PetscViewer       viewer;
1579   PetscViewerFormat format;
1580 
1581   PetscFunctionBegin;
1582   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1583   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1584 
1585   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1586 
1587   /* loop over matrix elements drawing boxes */
1588 
1589   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1590     color = PETSC_DRAW_BLUE;
1591     for (i=0,row=0; i<mbs; i++,row+=bs) {
1592       for (j=a->i[i]; j<a->i[i+1]; j++) {
1593         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1594         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1595         aa  = a->a + j*bs2;
1596         for (k=0; k<bs; k++) {
1597           for (l=0; l<bs; l++) {
1598             if (PetscRealPart(*aa++) >=  0.) continue;
1599             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1600           }
1601         }
1602       }
1603     }
1604     color = PETSC_DRAW_CYAN;
1605     for (i=0,row=0; i<mbs; i++,row+=bs) {
1606       for (j=a->i[i]; j<a->i[i+1]; j++) {
1607         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1608         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1609         aa  = a->a + j*bs2;
1610         for (k=0; k<bs; k++) {
1611           for (l=0; l<bs; l++) {
1612             if (PetscRealPart(*aa++) != 0.) continue;
1613             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1614           }
1615         }
1616       }
1617     }
1618     color = PETSC_DRAW_RED;
1619     for (i=0,row=0; i<mbs; i++,row+=bs) {
1620       for (j=a->i[i]; j<a->i[i+1]; j++) {
1621         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1622         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1623         aa  = a->a + j*bs2;
1624         for (k=0; k<bs; k++) {
1625           for (l=0; l<bs; l++) {
1626             if (PetscRealPart(*aa++) <= 0.) continue;
1627             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1628           }
1629         }
1630       }
1631     }
1632   } else {
1633     /* use contour shading to indicate magnitude of values */
1634     /* first determine max of all nonzero values */
1635     PetscDraw popup;
1636     PetscReal scale,maxv = 0.0;
1637 
1638     for (i=0; i<a->nz*a->bs2; i++) {
1639       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1640     }
1641     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1642     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1643     if (popup) {
1644       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1645     }
1646     for (i=0,row=0; i<mbs; i++,row+=bs) {
1647       for (j=a->i[i]; j<a->i[i+1]; j++) {
1648         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1649         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1650         aa  = a->a + j*bs2;
1651         for (k=0; k<bs; k++) {
1652           for (l=0; l<bs; l++) {
1653             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1654             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1655           }
1656         }
1657       }
1658     }
1659   }
1660   PetscFunctionReturn(0);
1661 }
1662 
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1665 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1666 {
1667   PetscErrorCode ierr;
1668   PetscReal      xl,yl,xr,yr,w,h;
1669   PetscDraw      draw;
1670   PetscBool      isnull;
1671 
1672   PetscFunctionBegin;
1673   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1674   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1675 
1676   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1677   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1678   xr  += w;    yr += h;  xl = -w;     yl = -h;
1679   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1680   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1681   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1682   PetscFunctionReturn(0);
1683 }
1684 
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatView_SeqBAIJ"
1687 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1688 {
1689   PetscErrorCode ierr;
1690   PetscBool      iascii,isbinary,isdraw;
1691 
1692   PetscFunctionBegin;
1693   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1694   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1695   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1696   if (iascii) {
1697     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1698   } else if (isbinary) {
1699     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1700   } else if (isdraw) {
1701     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1702   } else {
1703     Mat B;
1704     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1705     ierr = MatView(B,viewer);CHKERRQ(ierr);
1706     ierr = MatDestroy(&B);CHKERRQ(ierr);
1707   }
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 
1712 #undef __FUNCT__
1713 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1714 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1715 {
1716   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1717   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1718   PetscInt    *ai = a->i,*ailen = a->ilen;
1719   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1720   MatScalar   *ap,*aa = a->a;
1721 
1722   PetscFunctionBegin;
1723   for (k=0; k<m; k++) { /* loop over rows */
1724     row = im[k]; brow = row/bs;
1725     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1726     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1727     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1728     nrow = ailen[brow];
1729     for (l=0; l<n; l++) { /* loop over columns */
1730       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1731       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1732       col  = in[l];
1733       bcol = col/bs;
1734       cidx = col%bs;
1735       ridx = row%bs;
1736       high = nrow;
1737       low  = 0; /* assume unsorted */
1738       while (high-low > 5) {
1739         t = (low+high)/2;
1740         if (rp[t] > bcol) high = t;
1741         else             low  = t;
1742       }
1743       for (i=low; i<high; i++) {
1744         if (rp[i] > bcol) break;
1745         if (rp[i] == bcol) {
1746           *v++ = ap[bs2*i+bs*cidx+ridx];
1747           goto finished;
1748         }
1749       }
1750       *v++ = 0.0;
1751 finished:;
1752     }
1753   }
1754   PetscFunctionReturn(0);
1755 }
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1759 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1760 {
1761   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1762   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1763   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1764   PetscErrorCode    ierr;
1765   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1766   PetscBool         roworiented=a->roworiented;
1767   const PetscScalar *value     = v;
1768   MatScalar         *ap,*aa = a->a,*bap;
1769 
1770   PetscFunctionBegin;
1771   if (roworiented) {
1772     stepval = (n-1)*bs;
1773   } else {
1774     stepval = (m-1)*bs;
1775   }
1776   for (k=0; k<m; k++) { /* loop over added rows */
1777     row = im[k];
1778     if (row < 0) continue;
1779 #if defined(PETSC_USE_DEBUG)
1780     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1781 #endif
1782     rp   = aj + ai[row];
1783     ap   = aa + bs2*ai[row];
1784     rmax = imax[row];
1785     nrow = ailen[row];
1786     low  = 0;
1787     high = nrow;
1788     for (l=0; l<n; l++) { /* loop over added columns */
1789       if (in[l] < 0) continue;
1790 #if defined(PETSC_USE_DEBUG)
1791       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1792 #endif
1793       col = in[l];
1794       if (roworiented) {
1795         value = v + (k*(stepval+bs) + l)*bs;
1796       } else {
1797         value = v + (l*(stepval+bs) + k)*bs;
1798       }
1799       if (col <= lastcol) low = 0;
1800       else high = nrow;
1801       lastcol = col;
1802       while (high-low > 7) {
1803         t = (low+high)/2;
1804         if (rp[t] > col) high = t;
1805         else             low  = t;
1806       }
1807       for (i=low; i<high; i++) {
1808         if (rp[i] > col) break;
1809         if (rp[i] == col) {
1810           bap = ap +  bs2*i;
1811           if (roworiented) {
1812             if (is == ADD_VALUES) {
1813               for (ii=0; ii<bs; ii++,value+=stepval) {
1814                 for (jj=ii; jj<bs2; jj+=bs) {
1815                   bap[jj] += *value++;
1816                 }
1817               }
1818             } else {
1819               for (ii=0; ii<bs; ii++,value+=stepval) {
1820                 for (jj=ii; jj<bs2; jj+=bs) {
1821                   bap[jj] = *value++;
1822                 }
1823               }
1824             }
1825           } else {
1826             if (is == ADD_VALUES) {
1827               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1828                 for (jj=0; jj<bs; jj++) {
1829                   bap[jj] += value[jj];
1830                 }
1831                 bap += bs;
1832               }
1833             } else {
1834               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1835                 for (jj=0; jj<bs; jj++) {
1836                   bap[jj]  = value[jj];
1837                 }
1838                 bap += bs;
1839               }
1840             }
1841           }
1842           goto noinsert2;
1843         }
1844       }
1845       if (nonew == 1) goto noinsert2;
1846       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1847       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1848       N = nrow++ - 1; high++;
1849       /* shift up all the later entries in this row */
1850       for (ii=N; ii>=i; ii--) {
1851         rp[ii+1] = rp[ii];
1852         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1853       }
1854       if (N >= i) {
1855         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1856       }
1857       rp[i] = col;
1858       bap   = ap +  bs2*i;
1859       if (roworiented) {
1860         for (ii=0; ii<bs; ii++,value+=stepval) {
1861           for (jj=ii; jj<bs2; jj+=bs) {
1862             bap[jj] = *value++;
1863           }
1864         }
1865       } else {
1866         for (ii=0; ii<bs; ii++,value+=stepval) {
1867           for (jj=0; jj<bs; jj++) {
1868             *bap++ = *value++;
1869           }
1870         }
1871       }
1872 noinsert2:;
1873       low = i;
1874     }
1875     ailen[row] = nrow;
1876   }
1877   PetscFunctionReturn(0);
1878 }
1879 
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1882 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1883 {
1884   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1885   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1886   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1887   PetscErrorCode ierr;
1888   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1889   MatScalar      *aa  = a->a,*ap;
1890   PetscReal      ratio=0.6;
1891 
1892   PetscFunctionBegin;
1893   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1894 
1895   if (m) rmax = ailen[0];
1896   for (i=1; i<mbs; i++) {
1897     /* move each row back by the amount of empty slots (fshift) before it*/
1898     fshift += imax[i-1] - ailen[i-1];
1899     rmax    = PetscMax(rmax,ailen[i]);
1900     if (fshift) {
1901       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1902       N  = ailen[i];
1903       for (j=0; j<N; j++) {
1904         ip[j-fshift] = ip[j];
1905 
1906         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1907       }
1908     }
1909     ai[i] = ai[i-1] + ailen[i-1];
1910   }
1911   if (mbs) {
1912     fshift += imax[mbs-1] - ailen[mbs-1];
1913     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1914   }
1915 
1916   /* reset ilen and imax for each row */
1917   a->nonzerorowcnt = 0;
1918   for (i=0; i<mbs; i++) {
1919     ailen[i] = imax[i] = ai[i+1] - ai[i];
1920     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1921   }
1922   a->nz = ai[mbs];
1923 
1924   /* diagonals may have moved, so kill the diagonal pointers */
1925   a->idiagvalid = PETSC_FALSE;
1926   if (fshift && a->diag) {
1927     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1928     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1929     a->diag = 0;
1930   }
1931   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1932   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1933   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1934   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1935 
1936   A->info.mallocs    += a->reallocs;
1937   a->reallocs         = 0;
1938   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1939   a->rmax             = rmax;
1940 
1941   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1942   PetscFunctionReturn(0);
1943 }
1944 
1945 /*
1946    This function returns an array of flags which indicate the locations of contiguous
1947    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1948    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1949    Assume: sizes should be long enough to hold all the values.
1950 */
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1953 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1954 {
1955   PetscInt  i,j,k,row;
1956   PetscBool flg;
1957 
1958   PetscFunctionBegin;
1959   for (i=0,j=0; i<n; j++) {
1960     row = idx[i];
1961     if (row%bs!=0) { /* Not the begining of a block */
1962       sizes[j] = 1;
1963       i++;
1964     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1965       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1966       i++;
1967     } else { /* Begining of the block, so check if the complete block exists */
1968       flg = PETSC_TRUE;
1969       for (k=1; k<bs; k++) {
1970         if (row+k != idx[i+k]) { /* break in the block */
1971           flg = PETSC_FALSE;
1972           break;
1973         }
1974       }
1975       if (flg) { /* No break in the bs */
1976         sizes[j] = bs;
1977         i       += bs;
1978       } else {
1979         sizes[j] = 1;
1980         i++;
1981       }
1982     }
1983   }
1984   *bs_max = j;
1985   PetscFunctionReturn(0);
1986 }
1987 
1988 #undef __FUNCT__
1989 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1990 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1991 {
1992   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1993   PetscErrorCode    ierr;
1994   PetscInt          i,j,k,count,*rows;
1995   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1996   PetscScalar       zero = 0.0;
1997   MatScalar         *aa;
1998   const PetscScalar *xx;
1999   PetscScalar       *bb;
2000 
2001   PetscFunctionBegin;
2002   /* fix right hand side if needed */
2003   if (x && b) {
2004     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2005     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2006     for (i=0; i<is_n; i++) {
2007       bb[is_idx[i]] = diag*xx[is_idx[i]];
2008     }
2009     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2010     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2011   }
2012 
2013   /* Make a copy of the IS and  sort it */
2014   /* allocate memory for rows,sizes */
2015   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2016 
2017   /* copy IS values to rows, and sort them */
2018   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2019   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2020 
2021   if (baij->keepnonzeropattern) {
2022     for (i=0; i<is_n; i++) sizes[i] = 1;
2023     bs_max          = is_n;
2024   } else {
2025     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2026     A->nonzerostate++;
2027   }
2028 
2029   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2030     row = rows[j];
2031     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2032     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2033     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2034     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2035       if (diag != (PetscScalar)0.0) {
2036         if (baij->ilen[row/bs] > 0) {
2037           baij->ilen[row/bs]       = 1;
2038           baij->j[baij->i[row/bs]] = row/bs;
2039 
2040           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2041         }
2042         /* Now insert all the diagonal values for this bs */
2043         for (k=0; k<bs; k++) {
2044           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2045         }
2046       } else { /* (diag == 0.0) */
2047         baij->ilen[row/bs] = 0;
2048       } /* end (diag == 0.0) */
2049     } else { /* (sizes[i] != bs) */
2050 #if defined(PETSC_USE_DEBUG)
2051       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2052 #endif
2053       for (k=0; k<count; k++) {
2054         aa[0] =  zero;
2055         aa   += bs;
2056       }
2057       if (diag != (PetscScalar)0.0) {
2058         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2059       }
2060     }
2061   }
2062 
2063   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2064   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2065   PetscFunctionReturn(0);
2066 }
2067 
2068 #undef __FUNCT__
2069 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2070 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2071 {
2072   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2073   PetscErrorCode    ierr;
2074   PetscInt          i,j,k,count;
2075   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2076   PetscScalar       zero = 0.0;
2077   MatScalar         *aa;
2078   const PetscScalar *xx;
2079   PetscScalar       *bb;
2080   PetscBool         *zeroed,vecs = PETSC_FALSE;
2081 
2082   PetscFunctionBegin;
2083   /* fix right hand side if needed */
2084   if (x && b) {
2085     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2086     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2087     vecs = PETSC_TRUE;
2088   }
2089 
2090   /* zero the columns */
2091   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2092   for (i=0; i<is_n; i++) {
2093     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2094     zeroed[is_idx[i]] = PETSC_TRUE;
2095   }
2096   for (i=0; i<A->rmap->N; i++) {
2097     if (!zeroed[i]) {
2098       row = i/bs;
2099       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2100         for (k=0; k<bs; k++) {
2101           col = bs*baij->j[j] + k;
2102           if (zeroed[col]) {
2103             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2104             if (vecs) bb[i] -= aa[0]*xx[col];
2105             aa[0] = 0.0;
2106           }
2107         }
2108       }
2109     } else if (vecs) bb[i] = diag*xx[i];
2110   }
2111   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2112   if (vecs) {
2113     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2114     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2115   }
2116 
2117   /* zero the rows */
2118   for (i=0; i<is_n; i++) {
2119     row   = is_idx[i];
2120     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2121     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2122     for (k=0; k<count; k++) {
2123       aa[0] =  zero;
2124       aa   += bs;
2125     }
2126     if (diag != (PetscScalar)0.0) {
2127       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2128     }
2129   }
2130   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2136 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2137 {
2138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2139   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2140   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2141   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2142   PetscErrorCode ierr;
2143   PetscInt       ridx,cidx,bs2=a->bs2;
2144   PetscBool      roworiented=a->roworiented;
2145   MatScalar      *ap,value,*aa=a->a,*bap;
2146 
2147   PetscFunctionBegin;
2148   for (k=0; k<m; k++) { /* loop over added rows */
2149     row  = im[k];
2150     brow = row/bs;
2151     if (row < 0) continue;
2152 #if defined(PETSC_USE_DEBUG)
2153     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2154 #endif
2155     rp   = aj + ai[brow];
2156     ap   = aa + bs2*ai[brow];
2157     rmax = imax[brow];
2158     nrow = ailen[brow];
2159     low  = 0;
2160     high = nrow;
2161     for (l=0; l<n; l++) { /* loop over added columns */
2162       if (in[l] < 0) continue;
2163 #if defined(PETSC_USE_DEBUG)
2164       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2165 #endif
2166       col  = in[l]; bcol = col/bs;
2167       ridx = row % bs; cidx = col % bs;
2168       if (roworiented) {
2169         value = v[l + k*n];
2170       } else {
2171         value = v[k + l*m];
2172       }
2173       if (col <= lastcol) low = 0; else high = nrow;
2174       lastcol = col;
2175       while (high-low > 7) {
2176         t = (low+high)/2;
2177         if (rp[t] > bcol) high = t;
2178         else              low  = t;
2179       }
2180       for (i=low; i<high; i++) {
2181         if (rp[i] > bcol) break;
2182         if (rp[i] == bcol) {
2183           bap = ap +  bs2*i + bs*cidx + ridx;
2184           if (is == ADD_VALUES) *bap += value;
2185           else                  *bap  = value;
2186           goto noinsert1;
2187         }
2188       }
2189       if (nonew == 1) goto noinsert1;
2190       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2191       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2192       N = nrow++ - 1; high++;
2193       /* shift up all the later entries in this row */
2194       for (ii=N; ii>=i; ii--) {
2195         rp[ii+1] = rp[ii];
2196         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2197       }
2198       if (N>=i) {
2199         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2200       }
2201       rp[i]                      = bcol;
2202       ap[bs2*i + bs*cidx + ridx] = value;
2203       a->nz++;
2204       A->nonzerostate++;
2205 noinsert1:;
2206       low = i;
2207     }
2208     ailen[brow] = nrow;
2209   }
2210   PetscFunctionReturn(0);
2211 }
2212 
2213 #undef __FUNCT__
2214 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2215 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2216 {
2217   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2218   Mat            outA;
2219   PetscErrorCode ierr;
2220   PetscBool      row_identity,col_identity;
2221 
2222   PetscFunctionBegin;
2223   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2224   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2225   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2226   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2227 
2228   outA            = inA;
2229   inA->factortype = MAT_FACTOR_LU;
2230 
2231   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2232 
2233   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2234   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2235   a->row = row;
2236   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2237   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2238   a->col = col;
2239 
2240   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2241   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2242   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2243   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2244 
2245   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2246   if (!a->solve_work) {
2247     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2248     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2249   }
2250   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2251   PetscFunctionReturn(0);
2252 }
2253 
2254 #undef __FUNCT__
2255 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2256 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2257 {
2258   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2259   PetscInt    i,nz,mbs;
2260 
2261   PetscFunctionBegin;
2262   nz  = baij->maxnz;
2263   mbs = baij->mbs;
2264   for (i=0; i<nz; i++) {
2265     baij->j[i] = indices[i];
2266   }
2267   baij->nz = nz;
2268   for (i=0; i<mbs; i++) {
2269     baij->ilen[i] = baij->imax[i];
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 #undef __FUNCT__
2275 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2276 /*@
2277     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2278        in the matrix.
2279 
2280   Input Parameters:
2281 +  mat - the SeqBAIJ matrix
2282 -  indices - the column indices
2283 
2284   Level: advanced
2285 
2286   Notes:
2287     This can be called if you have precomputed the nonzero structure of the
2288   matrix and want to provide it to the matrix object to improve the performance
2289   of the MatSetValues() operation.
2290 
2291     You MUST have set the correct numbers of nonzeros per row in the call to
2292   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2293 
2294     MUST be called before any calls to MatSetValues();
2295 
2296 @*/
2297 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2298 {
2299   PetscErrorCode ierr;
2300 
2301   PetscFunctionBegin;
2302   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2303   PetscValidPointer(indices,2);
2304   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2305   PetscFunctionReturn(0);
2306 }
2307 
2308 #undef __FUNCT__
2309 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2310 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2311 {
2312   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2313   PetscErrorCode ierr;
2314   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2315   PetscReal      atmp;
2316   PetscScalar    *x,zero = 0.0;
2317   MatScalar      *aa;
2318   PetscInt       ncols,brow,krow,kcol;
2319 
2320   PetscFunctionBegin;
2321   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2322   bs  = A->rmap->bs;
2323   aa  = a->a;
2324   ai  = a->i;
2325   aj  = a->j;
2326   mbs = a->mbs;
2327 
2328   ierr = VecSet(v,zero);CHKERRQ(ierr);
2329   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2330   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2331   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2332   for (i=0; i<mbs; i++) {
2333     ncols = ai[1] - ai[0]; ai++;
2334     brow  = bs*i;
2335     for (j=0; j<ncols; j++) {
2336       for (kcol=0; kcol<bs; kcol++) {
2337         for (krow=0; krow<bs; krow++) {
2338           atmp = PetscAbsScalar(*aa);aa++;
2339           row  = brow + krow;   /* row index */
2340           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2341         }
2342       }
2343       aj++;
2344     }
2345   }
2346   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2347   PetscFunctionReturn(0);
2348 }
2349 
2350 #undef __FUNCT__
2351 #define __FUNCT__ "MatCopy_SeqBAIJ"
2352 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2353 {
2354   PetscErrorCode ierr;
2355 
2356   PetscFunctionBegin;
2357   /* If the two matrices have the same copy implementation, use fast copy. */
2358   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2359     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2360     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2361     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2362 
2363     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2364     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2365     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2366   } else {
2367     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2368   }
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2374 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2375 {
2376   PetscErrorCode ierr;
2377 
2378   PetscFunctionBegin;
2379   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2380   PetscFunctionReturn(0);
2381 }
2382 
2383 #undef __FUNCT__
2384 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2385 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2386 {
2387   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2388 
2389   PetscFunctionBegin;
2390   *array = a->a;
2391   PetscFunctionReturn(0);
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2396 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2397 {
2398   PetscFunctionBegin;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatAXPYGetPreallocation_SeqBAIJ"
2404 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2405 {
2406   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2407   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2408   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2409   PetscErrorCode ierr;
2410 
2411   PetscFunctionBegin;
2412   /* Set the number of nonzeros in the new matrix */
2413   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2419 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2420 {
2421   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2422   PetscErrorCode ierr;
2423   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2424   PetscBLASInt   one=1;
2425 
2426   PetscFunctionBegin;
2427   if (str == SAME_NONZERO_PATTERN) {
2428     PetscScalar  alpha = a;
2429     PetscBLASInt bnz;
2430     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2431     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2432     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2433   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2434     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2435   } else {
2436     Mat      B;
2437     PetscInt *nnz;
2438     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2439     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2440     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2441     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2442     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2443     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2444     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2445     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2446     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2447     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2448     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
2449     ierr = PetscFree(nnz);CHKERRQ(ierr);
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454 #undef __FUNCT__
2455 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2457 {
2458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2459   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2460   MatScalar   *aa = a->a;
2461 
2462   PetscFunctionBegin;
2463   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 #undef __FUNCT__
2481 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2482 /*
2483     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2484 */
2485 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2486 {
2487   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2488   PetscErrorCode ierr;
2489   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2490   PetscInt       nz = a->i[m],row,*jj,mr,col;
2491 
2492   PetscFunctionBegin;
2493   *nn = n;
2494   if (!ia) PetscFunctionReturn(0);
2495   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2496   else {
2497     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2498     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2499     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2500     jj   = a->j;
2501     for (i=0; i<nz; i++) {
2502       collengths[jj[i]]++;
2503     }
2504     cia[0] = oshift;
2505     for (i=0; i<n; i++) {
2506       cia[i+1] = cia[i] + collengths[i];
2507     }
2508     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2509     jj   = a->j;
2510     for (row=0; row<m; row++) {
2511       mr = a->i[row+1] - a->i[row];
2512       for (i=0; i<mr; i++) {
2513         col = *jj++;
2514 
2515         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2516       }
2517     }
2518     ierr = PetscFree(collengths);CHKERRQ(ierr);
2519     *ia  = cia; *ja = cja;
2520   }
2521   PetscFunctionReturn(0);
2522 }
2523 
2524 #undef __FUNCT__
2525 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2526 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2527 {
2528   PetscErrorCode ierr;
2529 
2530   PetscFunctionBegin;
2531   if (!ia) PetscFunctionReturn(0);
2532   ierr = PetscFree(*ia);CHKERRQ(ierr);
2533   ierr = PetscFree(*ja);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 /*
2538  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2539  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2540  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2541  */
2542 #undef __FUNCT__
2543 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2544 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2545 {
2546   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2547   PetscErrorCode ierr;
2548   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2549   PetscInt       nz = a->i[m],row,*jj,mr,col;
2550   PetscInt       *cspidx;
2551 
2552   PetscFunctionBegin;
2553   *nn = n;
2554   if (!ia) PetscFunctionReturn(0);
2555 
2556   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2557   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2558   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2559   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2560   jj   = a->j;
2561   for (i=0; i<nz; i++) {
2562     collengths[jj[i]]++;
2563   }
2564   cia[0] = oshift;
2565   for (i=0; i<n; i++) {
2566     cia[i+1] = cia[i] + collengths[i];
2567   }
2568   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2569   jj   = a->j;
2570   for (row=0; row<m; row++) {
2571     mr = a->i[row+1] - a->i[row];
2572     for (i=0; i<mr; i++) {
2573       col = *jj++;
2574       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2575       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2576     }
2577   }
2578   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2579   *ia    = cia; *ja = cja;
2580   *spidx = cspidx;
2581   PetscFunctionReturn(0);
2582 }
2583 
2584 #undef __FUNCT__
2585 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2586 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2587 {
2588   PetscErrorCode ierr;
2589 
2590   PetscFunctionBegin;
2591   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2592   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2593   PetscFunctionReturn(0);
2594 }
2595 
2596 #undef __FUNCT__
2597 #define __FUNCT__ "MatShift_SeqBAIJ"
2598 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2599 {
2600   PetscErrorCode ierr;
2601   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2602 
2603   PetscFunctionBegin;
2604   if (!aij->nz) {
2605     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2606   }
2607   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2608   PetscFunctionReturn(0);
2609 }
2610 
2611 /* -------------------------------------------------------------------*/
2612 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2613                                        MatGetRow_SeqBAIJ,
2614                                        MatRestoreRow_SeqBAIJ,
2615                                        MatMult_SeqBAIJ_N,
2616                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2617                                        MatMultTranspose_SeqBAIJ,
2618                                        MatMultTransposeAdd_SeqBAIJ,
2619                                        0,
2620                                        0,
2621                                        0,
2622                                /* 10*/ 0,
2623                                        MatLUFactor_SeqBAIJ,
2624                                        0,
2625                                        0,
2626                                        MatTranspose_SeqBAIJ,
2627                                /* 15*/ MatGetInfo_SeqBAIJ,
2628                                        MatEqual_SeqBAIJ,
2629                                        MatGetDiagonal_SeqBAIJ,
2630                                        MatDiagonalScale_SeqBAIJ,
2631                                        MatNorm_SeqBAIJ,
2632                                /* 20*/ 0,
2633                                        MatAssemblyEnd_SeqBAIJ,
2634                                        MatSetOption_SeqBAIJ,
2635                                        MatZeroEntries_SeqBAIJ,
2636                                /* 24*/ MatZeroRows_SeqBAIJ,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                        0,
2641                                /* 29*/ MatSetUp_SeqBAIJ,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                /* 34*/ MatDuplicate_SeqBAIJ,
2647                                        0,
2648                                        0,
2649                                        MatILUFactor_SeqBAIJ,
2650                                        0,
2651                                /* 39*/ MatAXPY_SeqBAIJ,
2652                                        MatGetSubMatrices_SeqBAIJ,
2653                                        MatIncreaseOverlap_SeqBAIJ,
2654                                        MatGetValues_SeqBAIJ,
2655                                        MatCopy_SeqBAIJ,
2656                                /* 44*/ 0,
2657                                        MatScale_SeqBAIJ,
2658                                        MatShift_SeqBAIJ,
2659                                        0,
2660                                        MatZeroRowsColumns_SeqBAIJ,
2661                                /* 49*/ 0,
2662                                        MatGetRowIJ_SeqBAIJ,
2663                                        MatRestoreRowIJ_SeqBAIJ,
2664                                        MatGetColumnIJ_SeqBAIJ,
2665                                        MatRestoreColumnIJ_SeqBAIJ,
2666                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        MatSetValuesBlocked_SeqBAIJ,
2671                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2672                                        MatDestroy_SeqBAIJ,
2673                                        MatView_SeqBAIJ,
2674                                        0,
2675                                        0,
2676                                /* 64*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2682                                        0,
2683                                        MatConvert_Basic,
2684                                        0,
2685                                        0,
2686                                /* 74*/ 0,
2687                                        MatFDColoringApply_BAIJ,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                /* 79*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        MatLoad_SeqBAIJ,
2696                                /* 84*/ 0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                /* 89*/ 0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                /* 94*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                /* 99*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*104*/ 0,
2717                                        MatRealPart_SeqBAIJ,
2718                                        MatImaginaryPart_SeqBAIJ,
2719                                        0,
2720                                        0,
2721                                /*109*/ 0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                        MatMissingDiagonal_SeqBAIJ,
2726                                /*114*/ 0,
2727                                        0,
2728                                        0,
2729                                        0,
2730                                        0,
2731                                /*119*/ 0,
2732                                        0,
2733                                        MatMultHermitianTranspose_SeqBAIJ,
2734                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2735                                        0,
2736                                /*124*/ 0,
2737                                        0,
2738                                        MatInvertBlockDiagonal_SeqBAIJ,
2739                                        0,
2740                                        0,
2741                                /*129*/ 0,
2742                                        0,
2743                                        0,
2744                                        0,
2745                                        0,
2746                                /*134*/ 0,
2747                                        0,
2748                                        0,
2749                                        0,
2750                                        0,
2751                                /*139*/ 0,
2752                                        0,
2753                                        0,
2754                                        MatFDColoringSetUp_SeqXAIJ,
2755                                        0,
2756                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ
2757 };
2758 
2759 #undef __FUNCT__
2760 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2761 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2762 {
2763   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2764   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2765   PetscErrorCode ierr;
2766 
2767   PetscFunctionBegin;
2768   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2769 
2770   /* allocate space for values if not already there */
2771   if (!aij->saved_values) {
2772     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2773     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2774   }
2775 
2776   /* copy values over */
2777   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 #undef __FUNCT__
2782 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2783 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2784 {
2785   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2786   PetscErrorCode ierr;
2787   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2788 
2789   PetscFunctionBegin;
2790   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2791   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2792 
2793   /* copy values over */
2794   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2795   PetscFunctionReturn(0);
2796 }
2797 
2798 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2799 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2800 
2801 #undef __FUNCT__
2802 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2803 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2804 {
2805   Mat_SeqBAIJ    *b;
2806   PetscErrorCode ierr;
2807   PetscInt       i,mbs,nbs,bs2;
2808   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2809 
2810   PetscFunctionBegin;
2811   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2812   if (nz == MAT_SKIP_ALLOCATION) {
2813     skipallocation = PETSC_TRUE;
2814     nz             = 0;
2815   }
2816 
2817   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2818   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2819   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2820   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2821 
2822   B->preallocated = PETSC_TRUE;
2823 
2824   mbs = B->rmap->n/bs;
2825   nbs = B->cmap->n/bs;
2826   bs2 = bs*bs;
2827 
2828   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2829 
2830   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2831   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2832   if (nnz) {
2833     for (i=0; i<mbs; i++) {
2834       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2835       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2836     }
2837   }
2838 
2839   b    = (Mat_SeqBAIJ*)B->data;
2840   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2841   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2842   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2843 
2844   if (!flg) {
2845     switch (bs) {
2846     case 1:
2847       B->ops->mult    = MatMult_SeqBAIJ_1;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2849       break;
2850     case 2:
2851       B->ops->mult    = MatMult_SeqBAIJ_2;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2853       break;
2854     case 3:
2855       B->ops->mult    = MatMult_SeqBAIJ_3;
2856       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2857       break;
2858     case 4:
2859       B->ops->mult    = MatMult_SeqBAIJ_4;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2861       break;
2862     case 5:
2863       B->ops->mult    = MatMult_SeqBAIJ_5;
2864       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2865       break;
2866     case 6:
2867       B->ops->mult    = MatMult_SeqBAIJ_6;
2868       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2869       break;
2870     case 7:
2871       B->ops->mult    = MatMult_SeqBAIJ_7;
2872       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2873       break;
2874     case 15:
2875       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2876       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2877       break;
2878     default:
2879       B->ops->mult    = MatMult_SeqBAIJ_N;
2880       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2881       break;
2882     }
2883   }
2884   B->ops->sor = MatSOR_SeqBAIJ;
2885   b->mbs = mbs;
2886   b->nbs = nbs;
2887   if (!skipallocation) {
2888     if (!b->imax) {
2889       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2890       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2891 
2892       b->free_imax_ilen = PETSC_TRUE;
2893     }
2894     /* b->ilen will count nonzeros in each block row so far. */
2895     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2896     if (!nnz) {
2897       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2898       else if (nz < 0) nz = 1;
2899       for (i=0; i<mbs; i++) b->imax[i] = nz;
2900       nz = nz*mbs;
2901     } else {
2902       nz = 0;
2903       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2904     }
2905 
2906     /* allocate the matrix space */
2907     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2908     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2909     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2910     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2911     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2912 
2913     b->singlemalloc = PETSC_TRUE;
2914     b->i[0]         = 0;
2915     for (i=1; i<mbs+1; i++) {
2916       b->i[i] = b->i[i-1] + b->imax[i-1];
2917     }
2918     b->free_a  = PETSC_TRUE;
2919     b->free_ij = PETSC_TRUE;
2920   } else {
2921     b->free_a  = PETSC_FALSE;
2922     b->free_ij = PETSC_FALSE;
2923   }
2924 
2925   b->bs2              = bs2;
2926   b->mbs              = mbs;
2927   b->nz               = 0;
2928   b->maxnz            = nz;
2929   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2930   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2931   PetscFunctionReturn(0);
2932 }
2933 
2934 #undef __FUNCT__
2935 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2936 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2937 {
2938   PetscInt       i,m,nz,nz_max=0,*nnz;
2939   PetscScalar    *values=0;
2940   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2941   PetscErrorCode ierr;
2942 
2943   PetscFunctionBegin;
2944   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2945   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2946   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2947   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2948   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2949   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2950   m    = B->rmap->n/bs;
2951 
2952   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2953   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2954   for (i=0; i<m; i++) {
2955     nz = ii[i+1]- ii[i];
2956     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2957     nz_max = PetscMax(nz_max, nz);
2958     nnz[i] = nz;
2959   }
2960   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2961   ierr = PetscFree(nnz);CHKERRQ(ierr);
2962 
2963   values = (PetscScalar*)V;
2964   if (!values) {
2965     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2966   }
2967   for (i=0; i<m; i++) {
2968     PetscInt          ncols  = ii[i+1] - ii[i];
2969     const PetscInt    *icols = jj + ii[i];
2970     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2971     if (!roworiented) {
2972       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2973     } else {
2974       PetscInt j;
2975       for (j=0; j<ncols; j++) {
2976         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2977         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2978       }
2979     }
2980   }
2981   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2982   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2983   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2984   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2985   PetscFunctionReturn(0);
2986 }
2987 
2988 /*MC
2989    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2990    block sparse compressed row format.
2991 
2992    Options Database Keys:
2993 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2994 
2995   Level: beginner
2996 
2997 .seealso: MatCreateSeqBAIJ()
2998 M*/
2999 
3000 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3001 
3002 #undef __FUNCT__
3003 #define __FUNCT__ "MatCreate_SeqBAIJ"
3004 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3005 {
3006   PetscErrorCode ierr;
3007   PetscMPIInt    size;
3008   Mat_SeqBAIJ    *b;
3009 
3010   PetscFunctionBegin;
3011   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3012   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3013 
3014   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3015   B->data = (void*)b;
3016   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3017 
3018   b->row          = 0;
3019   b->col          = 0;
3020   b->icol         = 0;
3021   b->reallocs     = 0;
3022   b->saved_values = 0;
3023 
3024   b->roworiented        = PETSC_TRUE;
3025   b->nonew              = 0;
3026   b->diag               = 0;
3027   B->spptr              = 0;
3028   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3029   b->keepnonzeropattern = PETSC_FALSE;
3030 
3031   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3032   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3033   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3034   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3035   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3036   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3037   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3042   PetscFunctionReturn(0);
3043 }
3044 
3045 #undef __FUNCT__
3046 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3047 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3048 {
3049   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3050   PetscErrorCode ierr;
3051   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3052 
3053   PetscFunctionBegin;
3054   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3055 
3056   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3057     c->imax           = a->imax;
3058     c->ilen           = a->ilen;
3059     c->free_imax_ilen = PETSC_FALSE;
3060   } else {
3061     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3062     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3063     for (i=0; i<mbs; i++) {
3064       c->imax[i] = a->imax[i];
3065       c->ilen[i] = a->ilen[i];
3066     }
3067     c->free_imax_ilen = PETSC_TRUE;
3068   }
3069 
3070   /* allocate the matrix space */
3071   if (mallocmatspace) {
3072     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3073       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3074       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3075 
3076       c->i            = a->i;
3077       c->j            = a->j;
3078       c->singlemalloc = PETSC_FALSE;
3079       c->free_a       = PETSC_TRUE;
3080       c->free_ij      = PETSC_FALSE;
3081       c->parent       = A;
3082       C->preallocated = PETSC_TRUE;
3083       C->assembled    = PETSC_TRUE;
3084 
3085       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3086       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3087       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3088     } else {
3089       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3090       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3091 
3092       c->singlemalloc = PETSC_TRUE;
3093       c->free_a       = PETSC_TRUE;
3094       c->free_ij      = PETSC_TRUE;
3095 
3096       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3097       if (mbs > 0) {
3098         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3099         if (cpvalues == MAT_COPY_VALUES) {
3100           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3101         } else {
3102           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3103         }
3104       }
3105       C->preallocated = PETSC_TRUE;
3106       C->assembled    = PETSC_TRUE;
3107     }
3108   }
3109 
3110   c->roworiented = a->roworiented;
3111   c->nonew       = a->nonew;
3112 
3113   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3114   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3115 
3116   c->bs2         = a->bs2;
3117   c->mbs         = a->mbs;
3118   c->nbs         = a->nbs;
3119 
3120   if (a->diag) {
3121     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3122       c->diag      = a->diag;
3123       c->free_diag = PETSC_FALSE;
3124     } else {
3125       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3126       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3127       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3128       c->free_diag = PETSC_TRUE;
3129     }
3130   } else c->diag = 0;
3131 
3132   c->nz         = a->nz;
3133   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3134   c->solve_work = NULL;
3135   c->mult_work  = NULL;
3136   c->sor_workt  = NULL;
3137   c->sor_work   = NULL;
3138 
3139   c->compressedrow.use   = a->compressedrow.use;
3140   c->compressedrow.nrows = a->compressedrow.nrows;
3141   if (a->compressedrow.use) {
3142     i    = a->compressedrow.nrows;
3143     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3144     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3145     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3146     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3147   } else {
3148     c->compressedrow.use    = PETSC_FALSE;
3149     c->compressedrow.i      = NULL;
3150     c->compressedrow.rindex = NULL;
3151   }
3152   C->nonzerostate = A->nonzerostate;
3153 
3154   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3155   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3156   PetscFunctionReturn(0);
3157 }
3158 
3159 #undef __FUNCT__
3160 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3161 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3162 {
3163   PetscErrorCode ierr;
3164 
3165   PetscFunctionBegin;
3166   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3167   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3168   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3169   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3170   PetscFunctionReturn(0);
3171 }
3172 
3173 #undef __FUNCT__
3174 #define __FUNCT__ "MatLoad_SeqBAIJ"
3175 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3176 {
3177   Mat_SeqBAIJ    *a;
3178   PetscErrorCode ierr;
3179   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3180   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3181   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3182   PetscInt       *masked,nmask,tmp,bs2,ishift;
3183   PetscMPIInt    size;
3184   int            fd;
3185   PetscScalar    *aa;
3186   MPI_Comm       comm;
3187 
3188   PetscFunctionBegin;
3189   /* force binary viewer to load .info file if it has not yet done so */
3190   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3191   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3192   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3193   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3194   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3195   if (bs < 0) bs = 1;
3196   bs2  = bs*bs;
3197 
3198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3199   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3200   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3201   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3202   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3203   M = header[1]; N = header[2]; nz = header[3];
3204 
3205   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3206   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3207 
3208   /*
3209      This code adds extra rows to make sure the number of rows is
3210     divisible by the blocksize
3211   */
3212   mbs        = M/bs;
3213   extra_rows = bs - M + bs*(mbs);
3214   if (extra_rows == bs) extra_rows = 0;
3215   else mbs++;
3216   if (extra_rows) {
3217     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3218   }
3219 
3220   /* Set global sizes if not already set */
3221   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3222     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3223   } else { /* Check if the matrix global sizes are correct */
3224     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3225     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3226       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3227     }
3228     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3229   }
3230 
3231   /* read in row lengths */
3232   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3233   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3234   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3235 
3236   /* read in column indices */
3237   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3238   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3239   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3240 
3241   /* loop over row lengths determining block row lengths */
3242   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3243   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3244   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3245   rowcount = 0;
3246   nzcount  = 0;
3247   for (i=0; i<mbs; i++) {
3248     nmask = 0;
3249     for (j=0; j<bs; j++) {
3250       kmax = rowlengths[rowcount];
3251       for (k=0; k<kmax; k++) {
3252         tmp = jj[nzcount++]/bs;
3253         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3254       }
3255       rowcount++;
3256     }
3257     browlengths[i] += nmask;
3258     /* zero out the mask elements we set */
3259     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3260   }
3261 
3262   /* Do preallocation  */
3263   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3264   a    = (Mat_SeqBAIJ*)newmat->data;
3265 
3266   /* set matrix "i" values */
3267   a->i[0] = 0;
3268   for (i=1; i<= mbs; i++) {
3269     a->i[i]      = a->i[i-1] + browlengths[i-1];
3270     a->ilen[i-1] = browlengths[i-1];
3271   }
3272   a->nz = 0;
3273   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3274 
3275   /* read in nonzero values */
3276   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3277   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3278   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3279 
3280   /* set "a" and "j" values into matrix */
3281   nzcount = 0; jcount = 0;
3282   for (i=0; i<mbs; i++) {
3283     nzcountb = nzcount;
3284     nmask    = 0;
3285     for (j=0; j<bs; j++) {
3286       kmax = rowlengths[i*bs+j];
3287       for (k=0; k<kmax; k++) {
3288         tmp = jj[nzcount++]/bs;
3289         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3290       }
3291     }
3292     /* sort the masked values */
3293     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3294 
3295     /* set "j" values into matrix */
3296     maskcount = 1;
3297     for (j=0; j<nmask; j++) {
3298       a->j[jcount++]  = masked[j];
3299       mask[masked[j]] = maskcount++;
3300     }
3301     /* set "a" values into matrix */
3302     ishift = bs2*a->i[i];
3303     for (j=0; j<bs; j++) {
3304       kmax = rowlengths[i*bs+j];
3305       for (k=0; k<kmax; k++) {
3306         tmp       = jj[nzcountb]/bs;
3307         block     = mask[tmp] - 1;
3308         point     = jj[nzcountb] - bs*tmp;
3309         idx       = ishift + bs2*block + j + bs*point;
3310         a->a[idx] = (MatScalar)aa[nzcountb++];
3311       }
3312     }
3313     /* zero out the mask elements we set */
3314     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3315   }
3316   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3317 
3318   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3319   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3320   ierr = PetscFree(aa);CHKERRQ(ierr);
3321   ierr = PetscFree(jj);CHKERRQ(ierr);
3322   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3323 
3324   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3325   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3326   PetscFunctionReturn(0);
3327 }
3328 
3329 #undef __FUNCT__
3330 #define __FUNCT__ "MatCreateSeqBAIJ"
3331 /*@C
3332    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3333    compressed row) format.  For good matrix assembly performance the
3334    user should preallocate the matrix storage by setting the parameter nz
3335    (or the array nnz).  By setting these parameters accurately, performance
3336    during matrix assembly can be increased by more than a factor of 50.
3337 
3338    Collective on MPI_Comm
3339 
3340    Input Parameters:
3341 +  comm - MPI communicator, set to PETSC_COMM_SELF
3342 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3343           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3344 .  m - number of rows
3345 .  n - number of columns
3346 .  nz - number of nonzero blocks  per block row (same for all rows)
3347 -  nnz - array containing the number of nonzero blocks in the various block rows
3348          (possibly different for each block row) or NULL
3349 
3350    Output Parameter:
3351 .  A - the matrix
3352 
3353    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3354    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3355    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3356 
3357    Options Database Keys:
3358 .   -mat_no_unroll - uses code that does not unroll the loops in the
3359                      block calculations (much slower)
3360 .    -mat_block_size - size of the blocks to use
3361 
3362    Level: intermediate
3363 
3364    Notes:
3365    The number of rows and columns must be divisible by blocksize.
3366 
3367    If the nnz parameter is given then the nz parameter is ignored
3368 
3369    A nonzero block is any block that as 1 or more nonzeros in it
3370 
3371    The block AIJ format is fully compatible with standard Fortran 77
3372    storage.  That is, the stored row and column indices can begin at
3373    either one (as in Fortran) or zero.  See the users' manual for details.
3374 
3375    Specify the preallocated storage with either nz or nnz (not both).
3376    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3377    allocation.  See Users-Manual: ch_mat for details.
3378    matrices.
3379 
3380 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3381 @*/
3382 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3383 {
3384   PetscErrorCode ierr;
3385 
3386   PetscFunctionBegin;
3387   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3388   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3389   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3390   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3391   PetscFunctionReturn(0);
3392 }
3393 
3394 #undef __FUNCT__
3395 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3396 /*@C
3397    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3398    per row in the matrix. For good matrix assembly performance the
3399    user should preallocate the matrix storage by setting the parameter nz
3400    (or the array nnz).  By setting these parameters accurately, performance
3401    during matrix assembly can be increased by more than a factor of 50.
3402 
3403    Collective on MPI_Comm
3404 
3405    Input Parameters:
3406 +  B - the matrix
3407 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3408           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3409 .  nz - number of block nonzeros per block row (same for all rows)
3410 -  nnz - array containing the number of block nonzeros in the various block rows
3411          (possibly different for each block row) or NULL
3412 
3413    Options Database Keys:
3414 .   -mat_no_unroll - uses code that does not unroll the loops in the
3415                      block calculations (much slower)
3416 .    -mat_block_size - size of the blocks to use
3417 
3418    Level: intermediate
3419 
3420    Notes:
3421    If the nnz parameter is given then the nz parameter is ignored
3422 
3423    You can call MatGetInfo() to get information on how effective the preallocation was;
3424    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3425    You can also run with the option -info and look for messages with the string
3426    malloc in them to see if additional memory allocation was needed.
3427 
3428    The block AIJ format is fully compatible with standard Fortran 77
3429    storage.  That is, the stored row and column indices can begin at
3430    either one (as in Fortran) or zero.  See the users' manual for details.
3431 
3432    Specify the preallocated storage with either nz or nnz (not both).
3433    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3434    allocation.  See Users-Manual: ch_mat for details.
3435 
3436 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3437 @*/
3438 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3439 {
3440   PetscErrorCode ierr;
3441 
3442   PetscFunctionBegin;
3443   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3444   PetscValidType(B,1);
3445   PetscValidLogicalCollectiveInt(B,bs,2);
3446   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3447   PetscFunctionReturn(0);
3448 }
3449 
3450 #undef __FUNCT__
3451 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3452 /*@C
3453    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3454    (the default sequential PETSc format).
3455 
3456    Collective on MPI_Comm
3457 
3458    Input Parameters:
3459 +  B - the matrix
3460 .  i - the indices into j for the start of each local row (starts with zero)
3461 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3462 -  v - optional values in the matrix
3463 
3464    Level: developer
3465 
3466    Notes:
3467    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3468    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3469    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3470    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3471    block column and the second index is over columns within a block.
3472 
3473 .keywords: matrix, aij, compressed row, sparse
3474 
3475 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3476 @*/
3477 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3478 {
3479   PetscErrorCode ierr;
3480 
3481   PetscFunctionBegin;
3482   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3483   PetscValidType(B,1);
3484   PetscValidLogicalCollectiveInt(B,bs,2);
3485   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3486   PetscFunctionReturn(0);
3487 }
3488 
3489 
3490 #undef __FUNCT__
3491 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3492 /*@
3493      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3494 
3495      Collective on MPI_Comm
3496 
3497    Input Parameters:
3498 +  comm - must be an MPI communicator of size 1
3499 .  bs - size of block
3500 .  m - number of rows
3501 .  n - number of columns
3502 .  i - row indices
3503 .  j - column indices
3504 -  a - matrix values
3505 
3506    Output Parameter:
3507 .  mat - the matrix
3508 
3509    Level: advanced
3510 
3511    Notes:
3512        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3513     once the matrix is destroyed
3514 
3515        You cannot set new nonzero locations into this matrix, that will generate an error.
3516 
3517        The i and j indices are 0 based
3518 
3519        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3520 
3521       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3522       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3523       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3524       with column-major ordering within blocks.
3525 
3526 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3527 
3528 @*/
3529 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3530 {
3531   PetscErrorCode ierr;
3532   PetscInt       ii;
3533   Mat_SeqBAIJ    *baij;
3534 
3535   PetscFunctionBegin;
3536   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3537   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3538 
3539   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3540   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3541   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3542   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3543   baij = (Mat_SeqBAIJ*)(*mat)->data;
3544   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3545   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3546 
3547   baij->i = i;
3548   baij->j = j;
3549   baij->a = a;
3550 
3551   baij->singlemalloc = PETSC_FALSE;
3552   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3553   baij->free_a       = PETSC_FALSE;
3554   baij->free_ij      = PETSC_FALSE;
3555 
3556   for (ii=0; ii<m; ii++) {
3557     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3558 #if defined(PETSC_USE_DEBUG)
3559     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3560 #endif
3561   }
3562 #if defined(PETSC_USE_DEBUG)
3563   for (ii=0; ii<baij->i[m]; ii++) {
3564     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3565     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3566   }
3567 #endif
3568 
3569   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3570   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3571   PetscFunctionReturn(0);
3572 }
3573 
3574 #undef __FUNCT__
3575 #define __FUNCT__ "MatCreateMPIMatConcatenateSeqMat_SeqBAIJ"
3576 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3577 {
3578   PetscErrorCode ierr;
3579 
3580   PetscFunctionBegin;
3581   ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3582   PetscFunctionReturn(0);
3583 }
3584