xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 7c8652dd9fb051dfaf30896d504f41ba028df3ea)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc-private/kernels/blockinvert.h>
9 #include <petsc-private/kernels/blockmatmult.h>
10 
11 #undef __FUNCT__
12 #define __FUNCT__ "MatInvertBlockDiagonal_SeqBAIJ"
13 PetscErrorCode  MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
14 {
15   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
16   PetscErrorCode ierr;
17   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
18   MatScalar      *v    = a->a,*odiag,*diag,*mdiag,work[25],*v_work;
19   PetscReal      shift = 0.0;
20 
21   PetscFunctionBegin;
22   if (a->idiagvalid) {
23     if (values) *values = a->idiag;
24     PetscFunctionReturn(0);
25   }
26   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
27   diag_offset = a->diag;
28   if (!a->idiag) {
29     ierr = PetscMalloc1(2*bs2*mbs,&a->idiag);CHKERRQ(ierr);
30     ierr = PetscLogObjectMemory((PetscObject)A,2*bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
31   }
32   diag  = a->idiag;
33   mdiag = a->idiag+bs2*mbs;
34   if (values) *values = a->idiag;
35   /* factor and invert each block */
36   switch (bs) {
37   case 1:
38     for (i=0; i<mbs; i++) {
39       odiag    = v + 1*diag_offset[i];
40       diag[0]  = odiag[0];
41       mdiag[0] = odiag[0];
42       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
43       diag    += 1;
44       mdiag   += 1;
45     }
46     break;
47   case 2:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 4*diag_offset[i];
50       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
51       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
52       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift);CHKERRQ(ierr);
53       diag    += 4;
54       mdiag   += 4;
55     }
56     break;
57   case 3:
58     for (i=0; i<mbs; i++) {
59       odiag    = v + 9*diag_offset[i];
60       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
61       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
62       diag[8]  = odiag[8];
63       mdiag[0] = odiag[0]; mdiag[1] = odiag[1]; mdiag[2] = odiag[2]; mdiag[3] = odiag[3];
64       mdiag[4] = odiag[4]; mdiag[5] = odiag[5]; mdiag[6] = odiag[6]; mdiag[7] = odiag[7];
65       mdiag[8] = odiag[8];
66       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift);CHKERRQ(ierr);
67       diag    += 9;
68       mdiag   += 9;
69     }
70     break;
71   case 4:
72     for (i=0; i<mbs; i++) {
73       odiag  = v + 16*diag_offset[i];
74       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
75       ierr   = PetscMemcpy(mdiag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
76       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift);CHKERRQ(ierr);
77       diag  += 16;
78       mdiag += 16;
79     }
80     break;
81   case 5:
82     for (i=0; i<mbs; i++) {
83       odiag  = v + 25*diag_offset[i];
84       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
85       ierr   = PetscMemcpy(mdiag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
86       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift);CHKERRQ(ierr);
87       diag  += 25;
88       mdiag += 25;
89     }
90     break;
91   case 6:
92     for (i=0; i<mbs; i++) {
93       odiag  = v + 36*diag_offset[i];
94       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
95       ierr   = PetscMemcpy(mdiag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
96       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift);CHKERRQ(ierr);
97       diag  += 36;
98       mdiag += 36;
99     }
100     break;
101   case 7:
102     for (i=0; i<mbs; i++) {
103       odiag  = v + 49*diag_offset[i];
104       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
105       ierr   = PetscMemcpy(mdiag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift);CHKERRQ(ierr);
107       diag  += 49;
108       mdiag += 49;
109     }
110     break;
111   default:
112     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
113     for (i=0; i<mbs; i++) {
114       odiag  = v + bs2*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscMemcpy(mdiag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
117       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work);CHKERRQ(ierr);
118       diag  += bs2;
119       mdiag += bs2;
120     }
121     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
122   }
123   a->idiagvalid = PETSC_TRUE;
124   PetscFunctionReturn(0);
125 }
126 
127 #undef __FUNCT__
128 #define __FUNCT__ "MatSOR_SeqBAIJ"
129 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
130 {
131   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
132   PetscScalar       *x,*work,*w,*workt,*t;
133   const MatScalar   *v,*aa = a->a, *idiag;
134   const PetscScalar *b,*xb;
135   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
136   PetscErrorCode    ierr;
137   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
138   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
139 
140   PetscFunctionBegin;
141   its = its*lits;
142   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
143   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
144   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
145   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
146   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
147 
148   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
149 
150   if (!m) PetscFunctionReturn(0);
151   diag  = a->diag;
152   idiag = a->idiag;
153   k    = PetscMax(A->rmap->n,A->cmap->n);
154   if (!a->mult_work) {
155     ierr = PetscMalloc1((2*k+1),&a->mult_work);CHKERRQ(ierr);
156   }
157   work = a->mult_work;
158   t = work + k+1;
159   if (!a->sor_work) {
160     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
161   }
162   w = a->sor_work;
163 
164   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
165   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
166 
167   if (flag & SOR_ZERO_INITIAL_GUESS) {
168     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
169       switch (bs) {
170       case 1:
171         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
172         t[0] = b[0];
173         i2     = 1;
174         idiag += 1;
175         for (i=1; i<m; i++) {
176           v  = aa + ai[i];
177           vi = aj + ai[i];
178           nz = diag[i] - ai[i];
179           s[0] = b[i2];
180           for (j=0; j<nz; j++) {
181             xw[0] = x[vi[j]];
182             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
183           }
184           t[i2] = s[0];
185           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
186           x[i2]  = xw[0];
187           idiag += 1;
188           i2    += 1;
189         }
190         break;
191       case 2:
192         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
193         t[0] = b[0]; t[1] = b[1];
194         i2     = 2;
195         idiag += 4;
196         for (i=1; i<m; i++) {
197           v  = aa + 4*ai[i];
198           vi = aj + ai[i];
199           nz = diag[i] - ai[i];
200           s[0] = b[i2]; s[1] = b[i2+1];
201           for (j=0; j<nz; j++) {
202             idx = 2*vi[j];
203             it  = 4*j;
204             xw[0] = x[idx]; xw[1] = x[1+idx];
205             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
206           }
207           t[i2] = s[0]; t[i2+1] = s[1];
208           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
209           x[i2]   = xw[0]; x[i2+1] = xw[1];
210           idiag  += 4;
211           i2     += 2;
212         }
213         break;
214       case 3:
215         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
216         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
217         i2     = 3;
218         idiag += 9;
219         for (i=1; i<m; i++) {
220           v  = aa + 9*ai[i];
221           vi = aj + ai[i];
222           nz = diag[i] - ai[i];
223           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
224           while (nz--) {
225             idx = 3*(*vi++);
226             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
227             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
228             v  += 9;
229           }
230           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
231           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
232           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
233           idiag  += 9;
234           i2     += 3;
235         }
236         break;
237       case 4:
238         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
239         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
240         i2     = 4;
241         idiag += 16;
242         for (i=1; i<m; i++) {
243           v  = aa + 16*ai[i];
244           vi = aj + ai[i];
245           nz = diag[i] - ai[i];
246           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
247           while (nz--) {
248             idx = 4*(*vi++);
249             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
250             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
251             v  += 16;
252           }
253           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
254           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
255           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
256           idiag  += 16;
257           i2     += 4;
258         }
259         break;
260       case 5:
261         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
262         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
263         i2     = 5;
264         idiag += 25;
265         for (i=1; i<m; i++) {
266           v  = aa + 25*ai[i];
267           vi = aj + ai[i];
268           nz = diag[i] - ai[i];
269           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
270           while (nz--) {
271             idx = 5*(*vi++);
272             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
273             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
274             v  += 25;
275           }
276           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
277           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
278           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
279           idiag  += 25;
280           i2     += 5;
281         }
282         break;
283       case 6:
284         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
285         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
286         i2     = 6;
287         idiag += 36;
288         for (i=1; i<m; i++) {
289           v  = aa + 36*ai[i];
290           vi = aj + ai[i];
291           nz = diag[i] - ai[i];
292           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
293           while (nz--) {
294             idx = 6*(*vi++);
295             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
296             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
297             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
298             v  += 36;
299           }
300           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
301           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
302           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
303           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
304           idiag  += 36;
305           i2     += 6;
306         }
307         break;
308       case 7:
309         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
310         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
311         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
312         i2     = 7;
313         idiag += 49;
314         for (i=1; i<m; i++) {
315           v  = aa + 49*ai[i];
316           vi = aj + ai[i];
317           nz = diag[i] - ai[i];
318           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
319           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
320           while (nz--) {
321             idx = 7*(*vi++);
322             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
323             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
324             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
325             v  += 49;
326           }
327           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
328           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
329           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
330           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
331           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
332           idiag  += 49;
333           i2     += 7;
334         }
335         break;
336       default:
337         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
338         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
339         i2     = bs;
340         idiag += bs2;
341         for (i=1; i<m; i++) {
342           v  = aa + bs2*ai[i];
343           vi = aj + ai[i];
344           nz = diag[i] - ai[i];
345 
346           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
347           /* copy all rows of x that are needed into contiguous space */
348           workt = work;
349           for (j=0; j<nz; j++) {
350             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
351             workt += bs;
352           }
353           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
354           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
355           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
356 
357           idiag += bs2;
358           i2    += bs;
359         }
360         break;
361       }
362       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
363       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
364       xb = t;
365     }
366     else xb = b;
367     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
368       idiag = a->idiag+bs2*(a->mbs-1);
369       i2 = bs * (m-1);
370       switch (bs) {
371       case 1:
372         s[0]  = xb[i2];
373         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
374         x[i2] = xw[0];
375         i2   -= 1;
376         for (i=m-2; i>=0; i--) {
377           v  = aa + (diag[i]+1);
378           vi = aj + diag[i] + 1;
379           nz = ai[i+1] - diag[i] - 1;
380           s[0] = xb[i2];
381           for (j=0; j<nz; j++) {
382             xw[0] = x[vi[j]];
383             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
384           }
385           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
386           x[i2]  = xw[0];
387           idiag -= 1;
388           i2    -= 1;
389         }
390         break;
391       case 2:
392         s[0]  = xb[i2]; s[1] = xb[i2+1];
393         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
394         x[i2] = xw[0]; x[i2+1] = xw[1];
395         i2    -= 2;
396         idiag -= 4;
397         for (i=m-2; i>=0; i--) {
398           v  = aa + 4*(diag[i] + 1);
399           vi = aj + diag[i] + 1;
400           nz = ai[i+1] - diag[i] - 1;
401           s[0] = xb[i2]; s[1] = xb[i2+1];
402           for (j=0; j<nz; j++) {
403             idx = 2*vi[j];
404             it  = 4*j;
405             xw[0] = x[idx]; xw[1] = x[1+idx];
406             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
407           }
408           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
409           x[i2]   = xw[0]; x[i2+1] = xw[1];
410           idiag  -= 4;
411           i2     -= 2;
412         }
413         break;
414       case 3:
415         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
416         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
417         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
418         i2    -= 3;
419         idiag -= 9;
420         for (i=m-2; i>=0; i--) {
421           v  = aa + 9*(diag[i]+1);
422           vi = aj + diag[i] + 1;
423           nz = ai[i+1] - diag[i] - 1;
424           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425           while (nz--) {
426             idx = 3*(*vi++);
427             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
428             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
429             v  += 9;
430           }
431           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
432           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
433           idiag  -= 9;
434           i2     -= 3;
435         }
436         break;
437       case 4:
438         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
439         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
440         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
441         i2    -= 4;
442         idiag -= 16;
443         for (i=m-2; i>=0; i--) {
444           v  = aa + 16*(diag[i]+1);
445           vi = aj + diag[i] + 1;
446           nz = ai[i+1] - diag[i] - 1;
447           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448           while (nz--) {
449             idx = 4*(*vi++);
450             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
451             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
452             v  += 16;
453           }
454           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
455           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
456           idiag  -= 16;
457           i2     -= 4;
458         }
459         break;
460       case 5:
461         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
462         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
463         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
464         i2    -= 5;
465         idiag -= 25;
466         for (i=m-2; i>=0; i--) {
467           v  = aa + 25*(diag[i]+1);
468           vi = aj + diag[i] + 1;
469           nz = ai[i+1] - diag[i] - 1;
470           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471           while (nz--) {
472             idx = 5*(*vi++);
473             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
474             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
475             v  += 25;
476           }
477           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
478           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
479           idiag  -= 25;
480           i2     -= 5;
481         }
482         break;
483       case 6:
484         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
485         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
486         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
487         i2    -= 6;
488         idiag -= 36;
489         for (i=m-2; i>=0; i--) {
490           v  = aa + 36*(diag[i]+1);
491           vi = aj + diag[i] + 1;
492           nz = ai[i+1] - diag[i] - 1;
493           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494           while (nz--) {
495             idx = 6*(*vi++);
496             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
497             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
498             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
499             v  += 36;
500           }
501           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
502           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
503           idiag  -= 36;
504           i2     -= 6;
505         }
506         break;
507       case 7:
508         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
509         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
510         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
511         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
512         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
513         i2    -= 7;
514         idiag -= 49;
515         for (i=m-2; i>=0; i--) {
516           v  = aa + 49*(diag[i]+1);
517           vi = aj + diag[i] + 1;
518           nz = ai[i+1] - diag[i] - 1;
519           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
520           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
521           while (nz--) {
522             idx = 7*(*vi++);
523             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
524             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
525             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
526             v  += 49;
527           }
528           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
529           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
530           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
531           idiag  -= 49;
532           i2     -= 7;
533         }
534         break;
535       default:
536         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
537         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
538         i2    -= bs;
539         idiag -= bs2;
540         for (i=m-2; i>=0; i--) {
541           v  = aa + bs2*(diag[i]+1);
542           vi = aj + diag[i] + 1;
543           nz = ai[i+1] - diag[i] - 1;
544 
545           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
546           /* copy all rows of x that are needed into contiguous space */
547           workt = work;
548           for (j=0; j<nz; j++) {
549             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
550             workt += bs;
551           }
552           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
553           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
554 
555           idiag -= bs2;
556           i2    -= bs;
557         }
558         break;
559       }
560       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
561     }
562     its--;
563   }
564   while (its--) {
565     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
566       idiag = a->idiag;
567       i2 = 0;
568       switch (bs) {
569       case 1:
570         for (i=0; i<m; i++) {
571           v  = aa + ai[i];
572           vi = aj + ai[i];
573           nz = ai[i+1] - ai[i];
574           s[0] = b[i2];
575           for (j=0; j<nz; j++) {
576             xw[0] = x[vi[j]];
577             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
578           }
579           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
580           x[i2] += xw[0];
581           idiag += 1;
582           i2    += 1;
583         }
584         break;
585       case 2:
586         for (i=0; i<m; i++) {
587           v  = aa + 4*ai[i];
588           vi = aj + ai[i];
589           nz = ai[i+1] - ai[i];
590           s[0] = b[i2]; s[1] = b[i2+1];
591           for (j=0; j<nz; j++) {
592             idx = 2*vi[j];
593             it  = 4*j;
594             xw[0] = x[idx]; xw[1] = x[1+idx];
595             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
596           }
597           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
598           x[i2]  += xw[0]; x[i2+1] += xw[1];
599           idiag  += 4;
600           i2     += 2;
601         }
602         break;
603       case 3:
604         for (i=0; i<m; i++) {
605           v  = aa + 9*ai[i];
606           vi = aj + ai[i];
607           nz = ai[i+1] - ai[i];
608           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
609           while (nz--) {
610             idx = 3*(*vi++);
611             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
612             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
613             v  += 9;
614           }
615           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
616           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
617           idiag  += 9;
618           i2     += 3;
619         }
620         break;
621       case 4:
622         for (i=0; i<m; i++) {
623           v  = aa + 16*ai[i];
624           vi = aj + ai[i];
625           nz = ai[i+1] - ai[i];
626           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
627           while (nz--) {
628             idx = 4*(*vi++);
629             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
630             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
631             v  += 16;
632           }
633           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
634           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
635           idiag  += 16;
636           i2     += 4;
637         }
638         break;
639       case 5:
640         for (i=0; i<m; i++) {
641           v  = aa + 25*ai[i];
642           vi = aj + ai[i];
643           nz = ai[i+1] - ai[i];
644           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
645           while (nz--) {
646             idx = 5*(*vi++);
647             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
648             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
649             v  += 25;
650           }
651           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
652           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
653           idiag  += 25;
654           i2     += 5;
655         }
656         break;
657       case 6:
658         for (i=0; i<m; i++) {
659           v  = aa + 36*ai[i];
660           vi = aj + ai[i];
661           nz = ai[i+1] - ai[i];
662           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
663           while (nz--) {
664             idx = 6*(*vi++);
665             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
666             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
667             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
668             v  += 36;
669           }
670           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
671           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
672           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
673           idiag  += 36;
674           i2     += 6;
675         }
676         break;
677       case 7:
678         for (i=0; i<m; i++) {
679           v  = aa + 49*ai[i];
680           vi = aj + ai[i];
681           nz = ai[i+1] - ai[i];
682           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
683           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
684           while (nz--) {
685             idx = 7*(*vi++);
686             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
687             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
688             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
689             v  += 49;
690           }
691           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
692           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
693           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
694           idiag  += 49;
695           i2     += 7;
696         }
697         break;
698       default:
699         for (i=0; i<m; i++) {
700           v  = aa + bs2*ai[i];
701           vi = aj + ai[i];
702           nz = ai[i+1] - ai[i];
703 
704           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
705           /* copy all rows of x that are needed into contiguous space */
706           workt = work;
707           for (j=0; j<nz; j++) {
708             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
709             workt += bs;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
712           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
713 
714           idiag += bs2;
715           i2    += bs;
716         }
717         break;
718       }
719       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
720     }
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = a->idiag+bs2*(a->mbs-1);
723       i2 = bs * (m-1);
724       switch (bs) {
725       case 1:
726         for (i=m-1; i>=0; i--) {
727           v  = aa + ai[i];
728           vi = aj + ai[i];
729           nz = ai[i+1] - ai[i];
730           s[0] = b[i2];
731           for (j=0; j<nz; j++) {
732             xw[0] = x[vi[j]];
733             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
734           }
735           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
736           x[i2] += xw[0];
737           idiag -= 1;
738           i2    -= 1;
739         }
740         break;
741       case 2:
742         for (i=m-1; i>=0; i--) {
743           v  = aa + 4*ai[i];
744           vi = aj + ai[i];
745           nz = ai[i+1] - ai[i];
746           s[0] = b[i2]; s[1] = b[i2+1];
747           for (j=0; j<nz; j++) {
748             idx = 2*vi[j];
749             it  = 4*j;
750             xw[0] = x[idx]; xw[1] = x[1+idx];
751             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
752           }
753           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
754           x[i2]  += xw[0]; x[i2+1] += xw[1];
755           idiag  -= 4;
756           i2     -= 2;
757         }
758         break;
759       case 3:
760         for (i=m-1; i>=0; i--) {
761           v  = aa + 9*ai[i];
762           vi = aj + ai[i];
763           nz = ai[i+1] - ai[i];
764           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
765           while (nz--) {
766             idx = 3*(*vi++);
767             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
768             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
769             v  += 9;
770           }
771           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
772           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
773           idiag  -= 9;
774           i2     -= 3;
775         }
776         break;
777       case 4:
778         for (i=m-1; i>=0; i--) {
779           v  = aa + 16*ai[i];
780           vi = aj + ai[i];
781           nz = ai[i+1] - ai[i];
782           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
783           while (nz--) {
784             idx = 4*(*vi++);
785             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
786             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
787             v  += 16;
788           }
789           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
790           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
791           idiag  -= 16;
792           i2     -= 4;
793         }
794         break;
795       case 5:
796         for (i=m-1; i>=0; i--) {
797           v  = aa + 25*ai[i];
798           vi = aj + ai[i];
799           nz = ai[i+1] - ai[i];
800           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
801           while (nz--) {
802             idx = 5*(*vi++);
803             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
804             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
805             v  += 25;
806           }
807           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
808           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
809           idiag  -= 25;
810           i2     -= 5;
811         }
812         break;
813       case 6:
814         for (i=m-1; i>=0; i--) {
815           v  = aa + 36*ai[i];
816           vi = aj + ai[i];
817           nz = ai[i+1] - ai[i];
818           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
819           while (nz--) {
820             idx = 6*(*vi++);
821             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
822             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
823             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
824             v  += 36;
825           }
826           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
827           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
828           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
829           idiag  -= 36;
830           i2     -= 6;
831         }
832         break;
833       case 7:
834         for (i=m-1; i>=0; i--) {
835           v  = aa + 49*ai[i];
836           vi = aj + ai[i];
837           nz = ai[i+1] - ai[i];
838           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
839           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
840           while (nz--) {
841             idx = 7*(*vi++);
842             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
843             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
844             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
845             v  += 49;
846           }
847           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
848           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
849           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
850           idiag  -= 49;
851           i2     -= 7;
852         }
853         break;
854       default:
855         for (i=m-1; i>=0; i--) {
856           v  = aa + bs2*ai[i];
857           vi = aj + ai[i];
858           nz = ai[i+1] - ai[i];
859 
860           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
861           /* copy all rows of x that are needed into contiguous space */
862           workt = work;
863           for (j=0; j<nz; j++) {
864             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
865             workt += bs;
866           }
867           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
868           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
869 
870           idiag -= bs2;
871           i2    -= bs;
872         }
873         break;
874       }
875       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
876     }
877   }
878   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
879   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
880   PetscFunctionReturn(0);
881 }
882 
883 
884 /*
885     Special version for direct calls from Fortran (Used in PETSc-fun3d)
886 */
887 #if defined(PETSC_HAVE_FORTRAN_CAPS)
888 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
889 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
890 #define matsetvaluesblocked4_ matsetvaluesblocked4
891 #endif
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "matsetvaluesblocked4_"
895 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
896 {
897   Mat               A  = *AA;
898   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
899   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
900   PetscInt          *ai    =a->i,*ailen=a->ilen;
901   PetscInt          *aj    =a->j,stepval,lastcol = -1;
902   const PetscScalar *value = v;
903   MatScalar         *ap,*aa = a->a,*bap;
904 
905   PetscFunctionBegin;
906   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
907   stepval = (n-1)*4;
908   for (k=0; k<m; k++) { /* loop over added rows */
909     row  = im[k];
910     rp   = aj + ai[row];
911     ap   = aa + 16*ai[row];
912     nrow = ailen[row];
913     low  = 0;
914     high = nrow;
915     for (l=0; l<n; l++) { /* loop over added columns */
916       col = in[l];
917       if (col <= lastcol)  low = 0;
918       else                high = nrow;
919       lastcol = col;
920       value   = v + k*(stepval+4 + l)*4;
921       while (high-low > 7) {
922         t = (low+high)/2;
923         if (rp[t] > col) high = t;
924         else             low  = t;
925       }
926       for (i=low; i<high; i++) {
927         if (rp[i] > col) break;
928         if (rp[i] == col) {
929           bap = ap +  16*i;
930           for (ii=0; ii<4; ii++,value+=stepval) {
931             for (jj=ii; jj<16; jj+=4) {
932               bap[jj] += *value++;
933             }
934           }
935           goto noinsert2;
936         }
937       }
938       N = nrow++ - 1;
939       high++; /* added new column index thus must search to one higher than before */
940       /* shift up all the later entries in this row */
941       for (ii=N; ii>=i; ii--) {
942         rp[ii+1] = rp[ii];
943         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
944       }
945       if (N >= i) {
946         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
947       }
948       rp[i] = col;
949       bap   = ap +  16*i;
950       for (ii=0; ii<4; ii++,value+=stepval) {
951         for (jj=ii; jj<16; jj+=4) {
952           bap[jj] = *value++;
953         }
954       }
955       noinsert2:;
956       low = i;
957     }
958     ailen[row] = nrow;
959   }
960   PetscFunctionReturnVoid();
961 }
962 
963 #if defined(PETSC_HAVE_FORTRAN_CAPS)
964 #define matsetvalues4_ MATSETVALUES4
965 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
966 #define matsetvalues4_ matsetvalues4
967 #endif
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatSetValues4_"
971 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
972 {
973   Mat         A  = *AA;
974   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
975   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
976   PetscInt    *ai=a->i,*ailen=a->ilen;
977   PetscInt    *aj=a->j,brow,bcol;
978   PetscInt    ridx,cidx,lastcol = -1;
979   MatScalar   *ap,value,*aa=a->a,*bap;
980 
981   PetscFunctionBegin;
982   for (k=0; k<m; k++) { /* loop over added rows */
983     row  = im[k]; brow = row/4;
984     rp   = aj + ai[brow];
985     ap   = aa + 16*ai[brow];
986     nrow = ailen[brow];
987     low  = 0;
988     high = nrow;
989     for (l=0; l<n; l++) { /* loop over added columns */
990       col   = in[l]; bcol = col/4;
991       ridx  = row % 4; cidx = col % 4;
992       value = v[l + k*n];
993       if (col <= lastcol)  low = 0;
994       else                high = nrow;
995       lastcol = col;
996       while (high-low > 7) {
997         t = (low+high)/2;
998         if (rp[t] > bcol) high = t;
999         else              low  = t;
1000       }
1001       for (i=low; i<high; i++) {
1002         if (rp[i] > bcol) break;
1003         if (rp[i] == bcol) {
1004           bap   = ap +  16*i + 4*cidx + ridx;
1005           *bap += value;
1006           goto noinsert1;
1007         }
1008       }
1009       N = nrow++ - 1;
1010       high++; /* added new column thus must search to one higher than before */
1011       /* shift up all the later entries in this row */
1012       for (ii=N; ii>=i; ii--) {
1013         rp[ii+1] = rp[ii];
1014         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1015       }
1016       if (N>=i) {
1017         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1018       }
1019       rp[i]                    = bcol;
1020       ap[16*i + 4*cidx + ridx] = value;
1021 noinsert1:;
1022       low = i;
1023     }
1024     ailen[brow] = nrow;
1025   }
1026   PetscFunctionReturnVoid();
1027 }
1028 
1029 /*
1030      Checks for missing diagonals
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatMissingDiagonal_SeqBAIJ"
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*jj = a->j,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !jj) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     PetscInfo(A,"Matrix has no entries therefore is missing diagonal");
1047   } else {
1048     diag = a->diag;
1049     for (i=0; i<a->mbs; i++) {
1050       if (jj[diag[i]] != i) {
1051         *missing = PETSC_TRUE;
1052         if (d) *d = i;
1053         PetscInfo1(A,"Matrix is missing block diagonal number %D",i);
1054         break;
1055       }
1056     }
1057   }
1058   PetscFunctionReturn(0);
1059 }
1060 
1061 #undef __FUNCT__
1062 #define __FUNCT__ "MatMarkDiagonal_SeqBAIJ"
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetRowIJ_SeqBAIJ"
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = 0;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1((A->rmap->n/bs+1),ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatRestoreRowIJ_SeqBAIJ"
1175 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1176 {
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   if (!ia) PetscFunctionReturn(0);
1181   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1182     ierr = PetscFree(*ia);CHKERRQ(ierr);
1183     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1184   }
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatDestroy_SeqBAIJ"
1190 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1191 {
1192   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196 #if defined(PETSC_USE_LOG)
1197   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1198 #endif
1199   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1200   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1201   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1202   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1203   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1204   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1205   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1206   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1207   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1208   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1209   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1210   ierr = PetscFree(a->xtoy);CHKERRQ(ierr);
1211   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1212 
1213   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1214   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1215   ierr = PetscFree(A->data);CHKERRQ(ierr);
1216 
1217   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_SeqBAIJ"
1233 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1234 {
1235   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_ROW_ORIENTED:
1241     a->roworiented = flg;
1242     break;
1243   case MAT_KEEP_NONZERO_PATTERN:
1244     a->keepnonzeropattern = flg;
1245     break;
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247     a->nonew = (flg ? 0 : 1);
1248     break;
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     a->nonew = (flg ? -1 : 0);
1251     break;
1252   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1253     a->nonew = (flg ? -2 : 0);
1254     break;
1255   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1256     a->nounused = (flg ? -1 : 0);
1257     break;
1258   case MAT_NEW_DIAGONALS:
1259   case MAT_IGNORE_OFF_PROC_ENTRIES:
1260   case MAT_USE_HASH_TABLE:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatGetRow_SeqBAIJ"
1278 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1279 {
1280   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,*ai,*aj,bs,bn,bp,*idx_i,bs2;
1283   MatScalar      *aa,*aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   ai  = a->i;
1289   aj  = a->j;
1290   aa  = a->a;
1291   bs2 = a->bs2;
1292 
1293   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1294 
1295   bn  = row/bs;   /* Block number */
1296   bp  = row % bs; /* Block Position */
1297   M   = ai[bn+1] - ai[bn];
1298   *nz = bs*M;
1299 
1300   if (v) {
1301     *v = 0;
1302     if (*nz) {
1303       ierr = PetscMalloc1((*nz),v);CHKERRQ(ierr);
1304       for (i=0; i<M; i++) { /* for each block in the block row */
1305         v_i  = *v + i*bs;
1306         aa_i = aa + bs2*(ai[bn] + i);
1307         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1308       }
1309     }
1310   }
1311 
1312   if (idx) {
1313     *idx = 0;
1314     if (*nz) {
1315       ierr = PetscMalloc1((*nz),idx);CHKERRQ(ierr);
1316       for (i=0; i<M; i++) { /* for each block in the block row */
1317         idx_i = *idx + i*bs;
1318         itmp  = bs*aj[ai[bn] + i];
1319         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1320       }
1321     }
1322   }
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 #undef __FUNCT__
1327 #define __FUNCT__ "MatRestoreRow_SeqBAIJ"
1328 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1329 {
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1334   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 extern PetscErrorCode MatSetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatTranspose_SeqBAIJ"
1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1345   Mat            C;
1346   PetscErrorCode ierr;
1347   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1348   PetscInt       *rows,*cols,bs2=a->bs2;
1349   MatScalar      *array;
1350 
1351   PetscFunctionBegin;
1352   if (reuse == MAT_REUSE_MATRIX && A == *B && mbs != nbs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Square matrix only for in-place");
1353   if (reuse == MAT_INITIAL_MATRIX || A == *B) {
1354     ierr = PetscCalloc1((1+nbs),&col);CHKERRQ(ierr);
1355 
1356     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1357     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1358     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1359     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1360     ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(C,bs,0,col);CHKERRQ(ierr);
1361     ierr = PetscFree(col);CHKERRQ(ierr);
1362   } else {
1363     C = *B;
1364   }
1365 
1366   array = a->a;
1367   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1368   for (i=0; i<mbs; i++) {
1369     cols[0] = i*bs;
1370     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1371     len = ai[i+1] - ai[i];
1372     for (j=0; j<len; j++) {
1373       rows[0] = (*aj++)*bs;
1374       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1375       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1376       array += bs2;
1377     }
1378   }
1379   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1380 
1381   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1382   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1383 
1384   if (reuse == MAT_INITIAL_MATRIX || *B != A) {
1385     *B = C;
1386   } else {
1387     ierr = MatHeaderMerge(A,C);CHKERRQ(ierr);
1388   }
1389   PetscFunctionReturn(0);
1390 }
1391 
1392 #undef __FUNCT__
1393 #define __FUNCT__ "MatIsTranspose_SeqBAIJ"
1394 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1395 {
1396   PetscErrorCode ierr;
1397   Mat            Btrans;
1398 
1399   PetscFunctionBegin;
1400   *f   = PETSC_FALSE;
1401   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1402   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1403   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatView_SeqBAIJ_Binary"
1409 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1410 {
1411   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1412   PetscErrorCode ierr;
1413   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1414   int            fd;
1415   PetscScalar    *aa;
1416   FILE           *file;
1417 
1418   PetscFunctionBegin;
1419   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1420   ierr        = PetscMalloc1((4+A->rmap->N),&col_lens);CHKERRQ(ierr);
1421   col_lens[0] = MAT_FILE_CLASSID;
1422 
1423   col_lens[1] = A->rmap->N;
1424   col_lens[2] = A->cmap->n;
1425   col_lens[3] = a->nz*bs2;
1426 
1427   /* store lengths of each row and write (including header) to file */
1428   count = 0;
1429   for (i=0; i<a->mbs; i++) {
1430     for (j=0; j<bs; j++) {
1431       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1432     }
1433   }
1434   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1435   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1436 
1437   /* store column indices (zero start index) */
1438   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1439   count = 0;
1440   for (i=0; i<a->mbs; i++) {
1441     for (j=0; j<bs; j++) {
1442       for (k=a->i[i]; k<a->i[i+1]; k++) {
1443         for (l=0; l<bs; l++) {
1444           jj[count++] = bs*a->j[k] + l;
1445         }
1446       }
1447     }
1448   }
1449   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1450   ierr = PetscFree(jj);CHKERRQ(ierr);
1451 
1452   /* store nonzero values */
1453   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1454   count = 0;
1455   for (i=0; i<a->mbs; i++) {
1456     for (j=0; j<bs; j++) {
1457       for (k=a->i[i]; k<a->i[i+1]; k++) {
1458         for (l=0; l<bs; l++) {
1459           aa[count++] = a->a[bs2*k + l*bs + j];
1460         }
1461       }
1462     }
1463   }
1464   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1465   ierr = PetscFree(aa);CHKERRQ(ierr);
1466 
1467   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1468   if (file) {
1469     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1470   }
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatView_SeqBAIJ_ASCII"
1476 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1477 {
1478   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1479   PetscErrorCode    ierr;
1480   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1481   PetscViewerFormat format;
1482 
1483   PetscFunctionBegin;
1484   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1485   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1486     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1487   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1488     Mat aij;
1489     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1490     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1491     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1492   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1493       PetscFunctionReturn(0);
1494   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1495     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1496     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr);
1497     for (i=0; i<a->mbs; i++) {
1498       for (j=0; j<bs; j++) {
1499         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1500         for (k=a->i[i]; k<a->i[i+1]; k++) {
1501           for (l=0; l<bs; l++) {
1502 #if defined(PETSC_USE_COMPLEX)
1503             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1504               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1505                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1506             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1507               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1508                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1509             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1510               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1511             }
1512 #else
1513             if (a->a[bs2*k + l*bs + j] != 0.0) {
1514               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1515             }
1516 #endif
1517           }
1518         }
1519         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1520       }
1521     }
1522     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1523   } else {
1524     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1525     ierr = PetscObjectPrintClassNamePrefixType((PetscObject)A,viewer);CHKERRQ(ierr);
1526     for (i=0; i<a->mbs; i++) {
1527       for (j=0; j<bs; j++) {
1528         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1529         for (k=a->i[i]; k<a->i[i+1]; k++) {
1530           for (l=0; l<bs; l++) {
1531 #if defined(PETSC_USE_COMPLEX)
1532             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1534                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             }
1541 #else
1542             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1543 #endif
1544           }
1545         }
1546         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1547       }
1548     }
1549     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1550   }
1551   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1552   PetscFunctionReturn(0);
1553 }
1554 
1555 #include <petscdraw.h>
1556 #undef __FUNCT__
1557 #define __FUNCT__ "MatView_SeqBAIJ_Draw_Zoom"
1558 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1559 {
1560   Mat               A = (Mat) Aa;
1561   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1562   PetscErrorCode    ierr;
1563   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1564   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1565   MatScalar         *aa;
1566   PetscViewer       viewer;
1567   PetscViewerFormat format;
1568 
1569   PetscFunctionBegin;
1570   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1571   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1572 
1573   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1574 
1575   /* loop over matrix elements drawing boxes */
1576 
1577   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1578     color = PETSC_DRAW_BLUE;
1579     for (i=0,row=0; i<mbs; i++,row+=bs) {
1580       for (j=a->i[i]; j<a->i[i+1]; j++) {
1581         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1582         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1583         aa  = a->a + j*bs2;
1584         for (k=0; k<bs; k++) {
1585           for (l=0; l<bs; l++) {
1586             if (PetscRealPart(*aa++) >=  0.) continue;
1587             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1588           }
1589         }
1590       }
1591     }
1592     color = PETSC_DRAW_CYAN;
1593     for (i=0,row=0; i<mbs; i++,row+=bs) {
1594       for (j=a->i[i]; j<a->i[i+1]; j++) {
1595         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1596         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1597         aa  = a->a + j*bs2;
1598         for (k=0; k<bs; k++) {
1599           for (l=0; l<bs; l++) {
1600             if (PetscRealPart(*aa++) != 0.) continue;
1601             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1602           }
1603         }
1604       }
1605     }
1606     color = PETSC_DRAW_RED;
1607     for (i=0,row=0; i<mbs; i++,row+=bs) {
1608       for (j=a->i[i]; j<a->i[i+1]; j++) {
1609         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1610         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1611         aa  = a->a + j*bs2;
1612         for (k=0; k<bs; k++) {
1613           for (l=0; l<bs; l++) {
1614             if (PetscRealPart(*aa++) <= 0.) continue;
1615             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1616           }
1617         }
1618       }
1619     }
1620   } else {
1621     /* use contour shading to indicate magnitude of values */
1622     /* first determine max of all nonzero values */
1623     PetscDraw popup;
1624     PetscReal scale,maxv = 0.0;
1625 
1626     for (i=0; i<a->nz*a->bs2; i++) {
1627       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1628     }
1629     scale = (245.0 - PETSC_DRAW_BASIC_COLORS)/maxv;
1630     ierr  = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1631     if (popup) {
1632       ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1633     }
1634     for (i=0,row=0; i<mbs; i++,row+=bs) {
1635       for (j=a->i[i]; j<a->i[i+1]; j++) {
1636         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1637         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1638         aa  = a->a + j*bs2;
1639         for (k=0; k<bs; k++) {
1640           for (l=0; l<bs; l++) {
1641             color = PETSC_DRAW_BASIC_COLORS + (PetscInt)(scale*PetscAbsScalar(*aa++));
1642             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1643           }
1644         }
1645       }
1646     }
1647   }
1648   PetscFunctionReturn(0);
1649 }
1650 
1651 #undef __FUNCT__
1652 #define __FUNCT__ "MatView_SeqBAIJ_Draw"
1653 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1654 {
1655   PetscErrorCode ierr;
1656   PetscReal      xl,yl,xr,yr,w,h;
1657   PetscDraw      draw;
1658   PetscBool      isnull;
1659 
1660   PetscFunctionBegin;
1661   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1662   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
1663 
1664   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1665   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1666   xr  += w;    yr += h;  xl = -w;     yl = -h;
1667   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1668   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1669   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1670   PetscFunctionReturn(0);
1671 }
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatView_SeqBAIJ"
1675 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1676 {
1677   PetscErrorCode ierr;
1678   PetscBool      iascii,isbinary,isdraw;
1679 
1680   PetscFunctionBegin;
1681   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1682   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1683   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1684   if (iascii) {
1685     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1686   } else if (isbinary) {
1687     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1688   } else if (isdraw) {
1689     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1690   } else {
1691     Mat B;
1692     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1693     ierr = MatView(B,viewer);CHKERRQ(ierr);
1694     ierr = MatDestroy(&B);CHKERRQ(ierr);
1695   }
1696   PetscFunctionReturn(0);
1697 }
1698 
1699 
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatGetValues_SeqBAIJ"
1702 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1703 {
1704   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1705   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1706   PetscInt    *ai = a->i,*ailen = a->ilen;
1707   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1708   MatScalar   *ap,*aa = a->a;
1709 
1710   PetscFunctionBegin;
1711   for (k=0; k<m; k++) { /* loop over rows */
1712     row = im[k]; brow = row/bs;
1713     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1714     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1715     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1716     nrow = ailen[brow];
1717     for (l=0; l<n; l++) { /* loop over columns */
1718       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1719       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1720       col  = in[l];
1721       bcol = col/bs;
1722       cidx = col%bs;
1723       ridx = row%bs;
1724       high = nrow;
1725       low  = 0; /* assume unsorted */
1726       while (high-low > 5) {
1727         t = (low+high)/2;
1728         if (rp[t] > bcol) high = t;
1729         else             low  = t;
1730       }
1731       for (i=low; i<high; i++) {
1732         if (rp[i] > bcol) break;
1733         if (rp[i] == bcol) {
1734           *v++ = ap[bs2*i+bs*cidx+ridx];
1735           goto finished;
1736         }
1737       }
1738       *v++ = 0.0;
1739 finished:;
1740     }
1741   }
1742   PetscFunctionReturn(0);
1743 }
1744 
1745 #undef __FUNCT__
1746 #define __FUNCT__ "MatSetValuesBlocked_SeqBAIJ"
1747 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1748 {
1749   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1750   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1751   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1752   PetscErrorCode    ierr;
1753   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1754   PetscBool         roworiented=a->roworiented;
1755   const PetscScalar *value     = v;
1756   MatScalar         *ap,*aa = a->a,*bap;
1757 
1758   PetscFunctionBegin;
1759   if (roworiented) {
1760     stepval = (n-1)*bs;
1761   } else {
1762     stepval = (m-1)*bs;
1763   }
1764   for (k=0; k<m; k++) { /* loop over added rows */
1765     row = im[k];
1766     if (row < 0) continue;
1767 #if defined(PETSC_USE_DEBUG)
1768     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,a->mbs-1);
1769 #endif
1770     rp   = aj + ai[row];
1771     ap   = aa + bs2*ai[row];
1772     rmax = imax[row];
1773     nrow = ailen[row];
1774     low  = 0;
1775     high = nrow;
1776     for (l=0; l<n; l++) { /* loop over added columns */
1777       if (in[l] < 0) continue;
1778 #if defined(PETSC_USE_DEBUG)
1779       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],a->nbs-1);
1780 #endif
1781       col = in[l];
1782       if (roworiented) {
1783         value = v + (k*(stepval+bs) + l)*bs;
1784       } else {
1785         value = v + (l*(stepval+bs) + k)*bs;
1786       }
1787       if (col <= lastcol) low = 0;
1788       else high = nrow;
1789       lastcol = col;
1790       while (high-low > 7) {
1791         t = (low+high)/2;
1792         if (rp[t] > col) high = t;
1793         else             low  = t;
1794       }
1795       for (i=low; i<high; i++) {
1796         if (rp[i] > col) break;
1797         if (rp[i] == col) {
1798           bap = ap +  bs2*i;
1799           if (roworiented) {
1800             if (is == ADD_VALUES) {
1801               for (ii=0; ii<bs; ii++,value+=stepval) {
1802                 for (jj=ii; jj<bs2; jj+=bs) {
1803                   bap[jj] += *value++;
1804                 }
1805               }
1806             } else {
1807               for (ii=0; ii<bs; ii++,value+=stepval) {
1808                 for (jj=ii; jj<bs2; jj+=bs) {
1809                   bap[jj] = *value++;
1810                 }
1811               }
1812             }
1813           } else {
1814             if (is == ADD_VALUES) {
1815               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1816                 for (jj=0; jj<bs; jj++) {
1817                   bap[jj] += value[jj];
1818                 }
1819                 bap += bs;
1820               }
1821             } else {
1822               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1823                 for (jj=0; jj<bs; jj++) {
1824                   bap[jj]  = value[jj];
1825                 }
1826                 bap += bs;
1827               }
1828             }
1829           }
1830           goto noinsert2;
1831         }
1832       }
1833       if (nonew == 1) goto noinsert2;
1834       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
1835       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1836       N = nrow++ - 1; high++;
1837       /* shift up all the later entries in this row */
1838       for (ii=N; ii>=i; ii--) {
1839         rp[ii+1] = rp[ii];
1840         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1841       }
1842       if (N >= i) {
1843         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1844       }
1845       rp[i] = col;
1846       bap   = ap +  bs2*i;
1847       if (roworiented) {
1848         for (ii=0; ii<bs; ii++,value+=stepval) {
1849           for (jj=ii; jj<bs2; jj+=bs) {
1850             bap[jj] = *value++;
1851           }
1852         }
1853       } else {
1854         for (ii=0; ii<bs; ii++,value+=stepval) {
1855           for (jj=0; jj<bs; jj++) {
1856             *bap++ = *value++;
1857           }
1858         }
1859       }
1860 noinsert2:;
1861       low = i;
1862     }
1863     ailen[row] = nrow;
1864   }
1865   PetscFunctionReturn(0);
1866 }
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatAssemblyEnd_SeqBAIJ"
1870 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1871 {
1872   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1873   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1874   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1875   PetscErrorCode ierr;
1876   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1877   MatScalar      *aa  = a->a,*ap;
1878   PetscReal      ratio=0.6;
1879 
1880   PetscFunctionBegin;
1881   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1882 
1883   if (m) rmax = ailen[0];
1884   for (i=1; i<mbs; i++) {
1885     /* move each row back by the amount of empty slots (fshift) before it*/
1886     fshift += imax[i-1] - ailen[i-1];
1887     rmax    = PetscMax(rmax,ailen[i]);
1888     if (fshift) {
1889       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1890       N  = ailen[i];
1891       for (j=0; j<N; j++) {
1892         ip[j-fshift] = ip[j];
1893 
1894         ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1895       }
1896     }
1897     ai[i] = ai[i-1] + ailen[i-1];
1898   }
1899   if (mbs) {
1900     fshift += imax[mbs-1] - ailen[mbs-1];
1901     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1902   }
1903 
1904   /* reset ilen and imax for each row */
1905   a->nonzerorowcnt = 0;
1906   for (i=0; i<mbs; i++) {
1907     ailen[i] = imax[i] = ai[i+1] - ai[i];
1908     a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1909   }
1910   a->nz = ai[mbs];
1911 
1912   /* diagonals may have moved, so kill the diagonal pointers */
1913   a->idiagvalid = PETSC_FALSE;
1914   if (fshift && a->diag) {
1915     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1916     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1917     a->diag = 0;
1918   }
1919   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1920   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1921   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1922   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1923 
1924   A->info.mallocs    += a->reallocs;
1925   a->reallocs         = 0;
1926   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1927 
1928   ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1929 
1930   A->same_nonzero = PETSC_TRUE;
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 /*
1935    This function returns an array of flags which indicate the locations of contiguous
1936    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1937    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1938    Assume: sizes should be long enough to hold all the values.
1939 */
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatZeroRows_SeqBAIJ_Check_Blocks"
1942 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1943 {
1944   PetscInt  i,j,k,row;
1945   PetscBool flg;
1946 
1947   PetscFunctionBegin;
1948   for (i=0,j=0; i<n; j++) {
1949     row = idx[i];
1950     if (row%bs!=0) { /* Not the begining of a block */
1951       sizes[j] = 1;
1952       i++;
1953     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1954       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1955       i++;
1956     } else { /* Begining of the block, so check if the complete block exists */
1957       flg = PETSC_TRUE;
1958       for (k=1; k<bs; k++) {
1959         if (row+k != idx[i+k]) { /* break in the block */
1960           flg = PETSC_FALSE;
1961           break;
1962         }
1963       }
1964       if (flg) { /* No break in the bs */
1965         sizes[j] = bs;
1966         i       += bs;
1967       } else {
1968         sizes[j] = 1;
1969         i++;
1970       }
1971     }
1972   }
1973   *bs_max = j;
1974   PetscFunctionReturn(0);
1975 }
1976 
1977 #undef __FUNCT__
1978 #define __FUNCT__ "MatZeroRows_SeqBAIJ"
1979 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1980 {
1981   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1982   PetscErrorCode    ierr;
1983   PetscInt          i,j,k,count,*rows;
1984   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
1985   PetscScalar       zero = 0.0;
1986   MatScalar         *aa;
1987   const PetscScalar *xx;
1988   PetscScalar       *bb;
1989 
1990   PetscFunctionBegin;
1991   /* fix right hand side if needed */
1992   if (x && b) {
1993     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
1994     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
1995     for (i=0; i<is_n; i++) {
1996       bb[is_idx[i]] = diag*xx[is_idx[i]];
1997     }
1998     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
1999     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2000   }
2001 
2002   /* Make a copy of the IS and  sort it */
2003   /* allocate memory for rows,sizes */
2004   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2005 
2006   /* copy IS values to rows, and sort them */
2007   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2008   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2009 
2010   if (baij->keepnonzeropattern) {
2011     for (i=0; i<is_n; i++) sizes[i] = 1;
2012     bs_max          = is_n;
2013     A->same_nonzero = PETSC_TRUE;
2014   } else {
2015     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2016 
2017     A->same_nonzero = PETSC_FALSE;
2018   }
2019 
2020   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2021     row = rows[j];
2022     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2023     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2024     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2025     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2026       if (diag != (PetscScalar)0.0) {
2027         if (baij->ilen[row/bs] > 0) {
2028           baij->ilen[row/bs]       = 1;
2029           baij->j[baij->i[row/bs]] = row/bs;
2030 
2031           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2032         }
2033         /* Now insert all the diagonal values for this bs */
2034         for (k=0; k<bs; k++) {
2035           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2036         }
2037       } else { /* (diag == 0.0) */
2038         baij->ilen[row/bs] = 0;
2039       } /* end (diag == 0.0) */
2040     } else { /* (sizes[i] != bs) */
2041 #if defined(PETSC_USE_DEBUG)
2042       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2043 #endif
2044       for (k=0; k<count; k++) {
2045         aa[0] =  zero;
2046         aa   += bs;
2047       }
2048       if (diag != (PetscScalar)0.0) {
2049         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2050       }
2051     }
2052   }
2053 
2054   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2055   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2056   PetscFunctionReturn(0);
2057 }
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatZeroRowsColumns_SeqBAIJ"
2061 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2062 {
2063   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2064   PetscErrorCode    ierr;
2065   PetscInt          i,j,k,count;
2066   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2067   PetscScalar       zero = 0.0;
2068   MatScalar         *aa;
2069   const PetscScalar *xx;
2070   PetscScalar       *bb;
2071   PetscBool         *zeroed,vecs = PETSC_FALSE;
2072 
2073   PetscFunctionBegin;
2074   /* fix right hand side if needed */
2075   if (x && b) {
2076     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2077     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2078     vecs = PETSC_TRUE;
2079   }
2080   A->same_nonzero = PETSC_TRUE;
2081 
2082   /* zero the columns */
2083   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2084   for (i=0; i<is_n; i++) {
2085     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2086     zeroed[is_idx[i]] = PETSC_TRUE;
2087   }
2088   for (i=0; i<A->rmap->N; i++) {
2089     if (!zeroed[i]) {
2090       row = i/bs;
2091       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2092         for (k=0; k<bs; k++) {
2093           col = bs*baij->j[j] + k;
2094           if (zeroed[col]) {
2095             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2096             if (vecs) bb[i] -= aa[0]*xx[col];
2097             aa[0] = 0.0;
2098           }
2099         }
2100       }
2101     } else if (vecs) bb[i] = diag*xx[i];
2102   }
2103   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2104   if (vecs) {
2105     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2106     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2107   }
2108 
2109   /* zero the rows */
2110   for (i=0; i<is_n; i++) {
2111     row   = is_idx[i];
2112     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2113     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2114     for (k=0; k<count; k++) {
2115       aa[0] =  zero;
2116       aa   += bs;
2117     }
2118     if (diag != (PetscScalar)0.0) {
2119       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2120     }
2121   }
2122   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2123   PetscFunctionReturn(0);
2124 }
2125 
2126 #undef __FUNCT__
2127 #define __FUNCT__ "MatSetValues_SeqBAIJ"
2128 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2129 {
2130   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2131   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2132   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2133   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2134   PetscErrorCode ierr;
2135   PetscInt       ridx,cidx,bs2=a->bs2;
2136   PetscBool      roworiented=a->roworiented;
2137   MatScalar      *ap,value,*aa=a->a,*bap;
2138 
2139   PetscFunctionBegin;
2140   if (v) PetscValidScalarPointer(v,6);
2141   for (k=0; k<m; k++) { /* loop over added rows */
2142     row  = im[k];
2143     brow = row/bs;
2144     if (row < 0) continue;
2145 #if defined(PETSC_USE_DEBUG)
2146     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2147 #endif
2148     rp   = aj + ai[brow];
2149     ap   = aa + bs2*ai[brow];
2150     rmax = imax[brow];
2151     nrow = ailen[brow];
2152     low  = 0;
2153     high = nrow;
2154     for (l=0; l<n; l++) { /* loop over added columns */
2155       if (in[l] < 0) continue;
2156 #if defined(PETSC_USE_DEBUG)
2157       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2158 #endif
2159       col  = in[l]; bcol = col/bs;
2160       ridx = row % bs; cidx = col % bs;
2161       if (roworiented) {
2162         value = v[l + k*n];
2163       } else {
2164         value = v[k + l*m];
2165       }
2166       if (col <= lastcol) low = 0; else high = nrow;
2167       lastcol = col;
2168       while (high-low > 7) {
2169         t = (low+high)/2;
2170         if (rp[t] > bcol) high = t;
2171         else              low  = t;
2172       }
2173       for (i=low; i<high; i++) {
2174         if (rp[i] > bcol) break;
2175         if (rp[i] == bcol) {
2176           bap = ap +  bs2*i + bs*cidx + ridx;
2177           if (is == ADD_VALUES) *bap += value;
2178           else                  *bap  = value;
2179           goto noinsert1;
2180         }
2181       }
2182       if (nonew == 1) goto noinsert1;
2183       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2184       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2185       N = nrow++ - 1; high++;
2186       /* shift up all the later entries in this row */
2187       for (ii=N; ii>=i; ii--) {
2188         rp[ii+1] = rp[ii];
2189         ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2190       }
2191       if (N>=i) {
2192         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2193       }
2194       rp[i]                      = bcol;
2195       ap[bs2*i + bs*cidx + ridx] = value;
2196       a->nz++;
2197 noinsert1:;
2198       low = i;
2199     }
2200     ailen[brow] = nrow;
2201   }
2202   A->same_nonzero = PETSC_FALSE;
2203   PetscFunctionReturn(0);
2204 }
2205 
2206 #undef __FUNCT__
2207 #define __FUNCT__ "MatILUFactor_SeqBAIJ"
2208 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2209 {
2210   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2211   Mat            outA;
2212   PetscErrorCode ierr;
2213   PetscBool      row_identity,col_identity;
2214 
2215   PetscFunctionBegin;
2216   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2217   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2218   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2219   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2220 
2221   outA            = inA;
2222   inA->factortype = MAT_FACTOR_LU;
2223 
2224   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2225 
2226   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2227   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2228   a->row = row;
2229   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2230   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2231   a->col = col;
2232 
2233   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2234   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2235   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2236   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2237 
2238   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2239   if (!a->solve_work) {
2240     ierr = PetscMalloc1((inA->rmap->N+inA->rmap->bs),&a->solve_work);CHKERRQ(ierr);
2241     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2242   }
2243   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2244   PetscFunctionReturn(0);
2245 }
2246 
2247 #undef __FUNCT__
2248 #define __FUNCT__ "MatSeqBAIJSetColumnIndices_SeqBAIJ"
2249 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2250 {
2251   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2252   PetscInt    i,nz,mbs;
2253 
2254   PetscFunctionBegin;
2255   nz  = baij->maxnz;
2256   mbs = baij->mbs;
2257   for (i=0; i<nz; i++) {
2258     baij->j[i] = indices[i];
2259   }
2260   baij->nz = nz;
2261   for (i=0; i<mbs; i++) {
2262     baij->ilen[i] = baij->imax[i];
2263   }
2264   PetscFunctionReturn(0);
2265 }
2266 
2267 #undef __FUNCT__
2268 #define __FUNCT__ "MatSeqBAIJSetColumnIndices"
2269 /*@
2270     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2271        in the matrix.
2272 
2273   Input Parameters:
2274 +  mat - the SeqBAIJ matrix
2275 -  indices - the column indices
2276 
2277   Level: advanced
2278 
2279   Notes:
2280     This can be called if you have precomputed the nonzero structure of the
2281   matrix and want to provide it to the matrix object to improve the performance
2282   of the MatSetValues() operation.
2283 
2284     You MUST have set the correct numbers of nonzeros per row in the call to
2285   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2286 
2287     MUST be called before any calls to MatSetValues();
2288 
2289 @*/
2290 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2291 {
2292   PetscErrorCode ierr;
2293 
2294   PetscFunctionBegin;
2295   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2296   PetscValidPointer(indices,2);
2297   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2298   PetscFunctionReturn(0);
2299 }
2300 
2301 #undef __FUNCT__
2302 #define __FUNCT__ "MatGetRowMaxAbs_SeqBAIJ"
2303 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2304 {
2305   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2306   PetscErrorCode ierr;
2307   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2308   PetscReal      atmp;
2309   PetscScalar    *x,zero = 0.0;
2310   MatScalar      *aa;
2311   PetscInt       ncols,brow,krow,kcol;
2312 
2313   PetscFunctionBegin;
2314   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2315   bs  = A->rmap->bs;
2316   aa  = a->a;
2317   ai  = a->i;
2318   aj  = a->j;
2319   mbs = a->mbs;
2320 
2321   ierr = VecSet(v,zero);CHKERRQ(ierr);
2322   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2323   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2324   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2325   for (i=0; i<mbs; i++) {
2326     ncols = ai[1] - ai[0]; ai++;
2327     brow  = bs*i;
2328     for (j=0; j<ncols; j++) {
2329       for (kcol=0; kcol<bs; kcol++) {
2330         for (krow=0; krow<bs; krow++) {
2331           atmp = PetscAbsScalar(*aa);aa++;
2332           row  = brow + krow;   /* row index */
2333           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2334         }
2335       }
2336       aj++;
2337     }
2338   }
2339   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatCopy_SeqBAIJ"
2345 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2346 {
2347   PetscErrorCode ierr;
2348 
2349   PetscFunctionBegin;
2350   /* If the two matrices have the same copy implementation, use fast copy. */
2351   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2352     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2353     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2354     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2355 
2356     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2357     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2358     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2359   } else {
2360     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2361   }
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatSetUp_SeqBAIJ"
2367 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2368 {
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr =  MatSeqBAIJSetPreallocation_SeqBAIJ(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 #undef __FUNCT__
2377 #define __FUNCT__ "MatSeqBAIJGetArray_SeqBAIJ"
2378 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2379 {
2380   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2381 
2382   PetscFunctionBegin;
2383   *array = a->a;
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatSeqBAIJRestoreArray_SeqBAIJ"
2389 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2390 {
2391   PetscFunctionBegin;
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 #undef __FUNCT__
2396 #define __FUNCT__ "MatAXPY_SeqBAIJ"
2397 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2398 {
2399   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2400   PetscErrorCode ierr;
2401   PetscInt       i,bs=Y->rmap->bs,j,bs2=bs*bs;
2402   PetscBLASInt   one=1;
2403 
2404   PetscFunctionBegin;
2405   if (str == SAME_NONZERO_PATTERN) {
2406     PetscScalar  alpha = a;
2407     PetscBLASInt bnz;
2408     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2409     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2410   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2411     if (y->xtoy && y->XtoY != X) {
2412       ierr = PetscFree(y->xtoy);CHKERRQ(ierr);
2413       ierr = MatDestroy(&y->XtoY);CHKERRQ(ierr);
2414     }
2415     if (!y->xtoy) { /* get xtoy */
2416       ierr    = MatAXPYGetxtoy_Private(x->mbs,x->i,x->j,NULL, y->i,y->j,NULL, &y->xtoy);CHKERRQ(ierr);
2417       y->XtoY = X;
2418       ierr    = PetscObjectReference((PetscObject)X);CHKERRQ(ierr);
2419     }
2420     for (i=0; i<x->nz; i++) {
2421       j = 0;
2422       while (j < bs2) {
2423         y->a[bs2*y->xtoy[i]+j] += a*(x->a[bs2*i+j]);
2424         j++;
2425       }
2426     }
2427     ierr = PetscInfo3(Y,"ratio of nnz(X)/nnz(Y): %D/%D = %g\n",bs2*x->nz,bs2*y->nz,(double)((PetscReal)(bs2*x->nz)/(bs2*y->nz)));CHKERRQ(ierr);
2428   } else {
2429     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2430   }
2431   PetscFunctionReturn(0);
2432 }
2433 
2434 #undef __FUNCT__
2435 #define __FUNCT__ "MatRealPart_SeqBAIJ"
2436 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2437 {
2438   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2439   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2440   MatScalar   *aa = a->a;
2441 
2442   PetscFunctionBegin;
2443   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatImaginaryPart_SeqBAIJ"
2449 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2450 {
2451   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2452   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2453   MatScalar   *aa = a->a;
2454 
2455   PetscFunctionBegin;
2456   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2457   PetscFunctionReturn(0);
2458 }
2459 
2460 #undef __FUNCT__
2461 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ"
2462 /*
2463     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2464 */
2465 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2466 {
2467   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2468   PetscErrorCode ierr;
2469   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2470   PetscInt       nz = a->i[m],row,*jj,mr,col;
2471 
2472   PetscFunctionBegin;
2473   *nn = n;
2474   if (!ia) PetscFunctionReturn(0);
2475   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2476   else {
2477     ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2478     ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2479     ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2480     jj   = a->j;
2481     for (i=0; i<nz; i++) {
2482       collengths[jj[i]]++;
2483     }
2484     cia[0] = oshift;
2485     for (i=0; i<n; i++) {
2486       cia[i+1] = cia[i] + collengths[i];
2487     }
2488     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2489     jj   = a->j;
2490     for (row=0; row<m; row++) {
2491       mr = a->i[row+1] - a->i[row];
2492       for (i=0; i<mr; i++) {
2493         col = *jj++;
2494 
2495         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2496       }
2497     }
2498     ierr = PetscFree(collengths);CHKERRQ(ierr);
2499     *ia  = cia; *ja = cja;
2500   }
2501   PetscFunctionReturn(0);
2502 }
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ"
2506 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2507 {
2508   PetscErrorCode ierr;
2509 
2510   PetscFunctionBegin;
2511   if (!ia) PetscFunctionReturn(0);
2512   ierr = PetscFree(*ia);CHKERRQ(ierr);
2513   ierr = PetscFree(*ja);CHKERRQ(ierr);
2514   PetscFunctionReturn(0);
2515 }
2516 
2517 /*
2518  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2519  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2520  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2521  */
2522 #undef __FUNCT__
2523 #define __FUNCT__ "MatGetColumnIJ_SeqBAIJ_Color"
2524 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2525 {
2526   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2527   PetscErrorCode ierr;
2528   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2529   PetscInt       nz = a->i[m],row,*jj,mr,col;
2530   PetscInt       *cspidx;
2531 
2532   PetscFunctionBegin;
2533   *nn = n;
2534   if (!ia) PetscFunctionReturn(0);
2535 
2536   ierr = PetscCalloc1((n+1),&collengths);CHKERRQ(ierr);
2537   ierr = PetscMalloc1((n+1),&cia);CHKERRQ(ierr);
2538   ierr = PetscMalloc1((nz+1),&cja);CHKERRQ(ierr);
2539   ierr = PetscMalloc1((nz+1),&cspidx);CHKERRQ(ierr);
2540   jj   = a->j;
2541   for (i=0; i<nz; i++) {
2542     collengths[jj[i]]++;
2543   }
2544   cia[0] = oshift;
2545   for (i=0; i<n; i++) {
2546     cia[i+1] = cia[i] + collengths[i];
2547   }
2548   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2549   jj   = a->j;
2550   for (row=0; row<m; row++) {
2551     mr = a->i[row+1] - a->i[row];
2552     for (i=0; i<mr; i++) {
2553       col = *jj++;
2554       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2555       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2556     }
2557   }
2558   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2559   *ia    = cia; *ja = cja;
2560   *spidx = cspidx;
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatRestoreColumnIJ_SeqBAIJ_Color"
2566 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2567 {
2568   PetscErrorCode ierr;
2569 
2570   PetscFunctionBegin;
2571   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2572   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 /* -------------------------------------------------------------------*/
2577 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2578                                        MatGetRow_SeqBAIJ,
2579                                        MatRestoreRow_SeqBAIJ,
2580                                        MatMult_SeqBAIJ_N,
2581                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2582                                        MatMultTranspose_SeqBAIJ,
2583                                        MatMultTransposeAdd_SeqBAIJ,
2584                                        0,
2585                                        0,
2586                                        0,
2587                                /* 10*/ 0,
2588                                        MatLUFactor_SeqBAIJ,
2589                                        0,
2590                                        0,
2591                                        MatTranspose_SeqBAIJ,
2592                                /* 15*/ MatGetInfo_SeqBAIJ,
2593                                        MatEqual_SeqBAIJ,
2594                                        MatGetDiagonal_SeqBAIJ,
2595                                        MatDiagonalScale_SeqBAIJ,
2596                                        MatNorm_SeqBAIJ,
2597                                /* 20*/ 0,
2598                                        MatAssemblyEnd_SeqBAIJ,
2599                                        MatSetOption_SeqBAIJ,
2600                                        MatZeroEntries_SeqBAIJ,
2601                                /* 24*/ MatZeroRows_SeqBAIJ,
2602                                        0,
2603                                        0,
2604                                        0,
2605                                        0,
2606                                /* 29*/ MatSetUp_SeqBAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                /* 34*/ MatDuplicate_SeqBAIJ,
2612                                        0,
2613                                        0,
2614                                        MatILUFactor_SeqBAIJ,
2615                                        0,
2616                                /* 39*/ MatAXPY_SeqBAIJ,
2617                                        MatGetSubMatrices_SeqBAIJ,
2618                                        MatIncreaseOverlap_SeqBAIJ,
2619                                        MatGetValues_SeqBAIJ,
2620                                        MatCopy_SeqBAIJ,
2621                                /* 44*/ 0,
2622                                        MatScale_SeqBAIJ,
2623                                        0,
2624                                        0,
2625                                        MatZeroRowsColumns_SeqBAIJ,
2626                                /* 49*/ 0,
2627                                        MatGetRowIJ_SeqBAIJ,
2628                                        MatRestoreRowIJ_SeqBAIJ,
2629                                        MatGetColumnIJ_SeqBAIJ,
2630                                        MatRestoreColumnIJ_SeqBAIJ,
2631                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                        MatSetValuesBlocked_SeqBAIJ,
2636                                /* 59*/ MatGetSubMatrix_SeqBAIJ,
2637                                        MatDestroy_SeqBAIJ,
2638                                        MatView_SeqBAIJ,
2639                                        0,
2640                                        0,
2641                                /* 64*/ 0,
2642                                        0,
2643                                        0,
2644                                        0,
2645                                        0,
2646                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2647                                        0,
2648                                        MatConvert_Basic,
2649                                        0,
2650                                        0,
2651                                /* 74*/ 0,
2652                                        MatFDColoringApply_BAIJ,
2653                                        0,
2654                                        0,
2655                                        0,
2656                                /* 79*/ 0,
2657                                        0,
2658                                        0,
2659                                        0,
2660                                        MatLoad_SeqBAIJ,
2661                                /* 84*/ 0,
2662                                        0,
2663                                        0,
2664                                        0,
2665                                        0,
2666                                /* 89*/ 0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                /* 94*/ 0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                        0,
2676                                /* 99*/ 0,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                        0,
2681                                /*104*/ 0,
2682                                        MatRealPart_SeqBAIJ,
2683                                        MatImaginaryPart_SeqBAIJ,
2684                                        0,
2685                                        0,
2686                                /*109*/ 0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                        MatMissingDiagonal_SeqBAIJ,
2691                                /*114*/ 0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                /*119*/ 0,
2697                                        0,
2698                                        MatMultHermitianTranspose_SeqBAIJ,
2699                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2700                                        0,
2701                                /*124*/ 0,
2702                                        0,
2703                                        MatInvertBlockDiagonal_SeqBAIJ,
2704                                        0,
2705                                        0,
2706                                /*129*/ 0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                        0,
2711                                /*134*/ 0,
2712                                        0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                /*139*/ 0,
2717                                        0,
2718                                        0,
2719                                        MatFDColoringSetUp_SeqXAIJ
2720 };
2721 
2722 #undef __FUNCT__
2723 #define __FUNCT__ "MatStoreValues_SeqBAIJ"
2724 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2725 {
2726   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2727   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2728   PetscErrorCode ierr;
2729 
2730   PetscFunctionBegin;
2731   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2732 
2733   /* allocate space for values if not already there */
2734   if (!aij->saved_values) {
2735     ierr = PetscMalloc1((nz+1),&aij->saved_values);CHKERRQ(ierr);
2736     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2737   }
2738 
2739   /* copy values over */
2740   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2741   PetscFunctionReturn(0);
2742 }
2743 
2744 #undef __FUNCT__
2745 #define __FUNCT__ "MatRetrieveValues_SeqBAIJ"
2746 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2747 {
2748   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2749   PetscErrorCode ierr;
2750   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2751 
2752   PetscFunctionBegin;
2753   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2754   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2755 
2756   /* copy values over */
2757   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2758   PetscFunctionReturn(0);
2759 }
2760 
2761 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2762 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatSeqBAIJSetPreallocation_SeqBAIJ"
2766 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2767 {
2768   Mat_SeqBAIJ    *b;
2769   PetscErrorCode ierr;
2770   PetscInt       i,mbs,nbs,bs2;
2771   PetscBool      flg,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2772 
2773   PetscFunctionBegin;
2774   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2775   if (nz == MAT_SKIP_ALLOCATION) {
2776     skipallocation = PETSC_TRUE;
2777     nz             = 0;
2778   }
2779 
2780   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2781   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2782   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2783   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2784   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2785 
2786   B->preallocated = PETSC_TRUE;
2787 
2788   mbs = B->rmap->n/bs;
2789   nbs = B->cmap->n/bs;
2790   bs2 = bs*bs;
2791 
2792   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2793 
2794   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2795   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2796   if (nnz) {
2797     for (i=0; i<mbs; i++) {
2798       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2799       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2800     }
2801   }
2802 
2803   b    = (Mat_SeqBAIJ*)B->data;
2804   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2805   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
2806   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2807 
2808   if (!flg) {
2809     switch (bs) {
2810     case 1:
2811       B->ops->mult    = MatMult_SeqBAIJ_1;
2812       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2813       break;
2814     case 2:
2815       B->ops->mult    = MatMult_SeqBAIJ_2;
2816       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2817       break;
2818     case 3:
2819       B->ops->mult    = MatMult_SeqBAIJ_3;
2820       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2821       break;
2822     case 4:
2823       B->ops->mult    = MatMult_SeqBAIJ_4;
2824       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2825       break;
2826     case 5:
2827       B->ops->mult    = MatMult_SeqBAIJ_5;
2828       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2829       break;
2830     case 6:
2831       B->ops->mult    = MatMult_SeqBAIJ_6;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2833       break;
2834     case 7:
2835       B->ops->mult    = MatMult_SeqBAIJ_7;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2837       break;
2838     case 15:
2839       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2840       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2841       break;
2842     default:
2843       B->ops->mult    = MatMult_SeqBAIJ_N;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2845       break;
2846     }
2847   }
2848   B->ops->sor = MatSOR_SeqBAIJ;
2849   b->mbs = mbs;
2850   b->nbs = nbs;
2851   if (!skipallocation) {
2852     if (!b->imax) {
2853       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2854       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2855 
2856       b->free_imax_ilen = PETSC_TRUE;
2857     }
2858     /* b->ilen will count nonzeros in each block row so far. */
2859     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2860     if (!nnz) {
2861       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2862       else if (nz < 0) nz = 1;
2863       for (i=0; i<mbs; i++) b->imax[i] = nz;
2864       nz = nz*mbs;
2865     } else {
2866       nz = 0;
2867       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2868     }
2869 
2870     /* allocate the matrix space */
2871     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2872     ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2873     ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2874     ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2875     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2876 
2877     b->singlemalloc = PETSC_TRUE;
2878     b->i[0]         = 0;
2879     for (i=1; i<mbs+1; i++) {
2880       b->i[i] = b->i[i-1] + b->imax[i-1];
2881     }
2882     b->free_a  = PETSC_TRUE;
2883     b->free_ij = PETSC_TRUE;
2884 #if defined(PETSC_THREADCOMM_ACTIVE)
2885     ierr = MatZeroEntries_SeqBAIJ(B);CHKERRQ(ierr);
2886 #endif
2887   } else {
2888     b->free_a  = PETSC_FALSE;
2889     b->free_ij = PETSC_FALSE;
2890   }
2891 
2892   b->bs2              = bs2;
2893   b->mbs              = mbs;
2894   b->nz               = 0;
2895   b->maxnz            = nz;
2896   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2897   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2898   PetscFunctionReturn(0);
2899 }
2900 
2901 #undef __FUNCT__
2902 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR_SeqBAIJ"
2903 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2904 {
2905   PetscInt       i,m,nz,nz_max=0,*nnz;
2906   PetscScalar    *values=0;
2907   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2908   PetscErrorCode ierr;
2909 
2910   PetscFunctionBegin;
2911   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2912   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2913   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2914   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2915   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2916   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2917   m    = B->rmap->n/bs;
2918 
2919   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2920   ierr = PetscMalloc1((m+1), &nnz);CHKERRQ(ierr);
2921   for (i=0; i<m; i++) {
2922     nz = ii[i+1]- ii[i];
2923     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2924     nz_max = PetscMax(nz_max, nz);
2925     nnz[i] = nz;
2926   }
2927   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2928   ierr = PetscFree(nnz);CHKERRQ(ierr);
2929 
2930   values = (PetscScalar*)V;
2931   if (!values) {
2932     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2933   }
2934   for (i=0; i<m; i++) {
2935     PetscInt          ncols  = ii[i+1] - ii[i];
2936     const PetscInt    *icols = jj + ii[i];
2937     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2938     if (!roworiented) {
2939       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2940     } else {
2941       PetscInt j;
2942       for (j=0; j<ncols; j++) {
2943         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2944         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2945       }
2946     }
2947   }
2948   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2949   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2950   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2951   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2952   PetscFunctionReturn(0);
2953 }
2954 
2955 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_petsc(Mat,MatFactorType,Mat*);
2956 PETSC_EXTERN PetscErrorCode MatGetFactor_seqbaij_bstrm(Mat,MatFactorType,Mat*);
2957 #if defined(PETSC_HAVE_MUMPS)
2958 PETSC_EXTERN PetscErrorCode MatGetFactor_baij_mumps(Mat,MatFactorType,Mat*);
2959 #endif
2960 extern PetscErrorCode  MatGetFactorAvailable_seqbaij_petsc(Mat,MatFactorType,PetscBool*);
2961 
2962 /*MC
2963    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2964    block sparse compressed row format.
2965 
2966    Options Database Keys:
2967 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
2968 
2969   Level: beginner
2970 
2971 .seealso: MatCreateSeqBAIJ()
2972 M*/
2973 
2974 PETSC_EXTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
2975 
2976 #undef __FUNCT__
2977 #define __FUNCT__ "MatCreate_SeqBAIJ"
2978 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
2979 {
2980   PetscErrorCode ierr;
2981   PetscMPIInt    size;
2982   Mat_SeqBAIJ    *b;
2983 
2984   PetscFunctionBegin;
2985   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
2986   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
2987 
2988   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
2989   B->data = (void*)b;
2990   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
2991 
2992   b->row          = 0;
2993   b->col          = 0;
2994   b->icol         = 0;
2995   b->reallocs     = 0;
2996   b->saved_values = 0;
2997 
2998   b->roworiented        = PETSC_TRUE;
2999   b->nonew              = 0;
3000   b->diag               = 0;
3001   b->solve_work         = 0;
3002   b->mult_work          = 0;
3003   B->spptr              = 0;
3004   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3005   b->keepnonzeropattern = PETSC_FALSE;
3006   b->xtoy               = 0;
3007   b->XtoY               = 0;
3008   B->same_nonzero       = PETSC_FALSE;
3009 
3010   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactorAvailable_petsc_C",MatGetFactorAvailable_seqbaij_petsc);CHKERRQ(ierr);
3011   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_petsc_C",MatGetFactor_seqbaij_petsc);CHKERRQ(ierr);
3012   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_bstrm_C",MatGetFactor_seqbaij_bstrm);CHKERRQ(ierr);
3013 #if defined(PETSC_HAVE_MUMPS)
3014   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C", MatGetFactor_baij_mumps);CHKERRQ(ierr);
3015 #endif
3016   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3017   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3018   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3019   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3020   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3021   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3022   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3023   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3024   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqbstrm_C",MatConvert_SeqBAIJ_SeqBSTRM);CHKERRQ(ierr);
3025   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3026   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3027   PetscFunctionReturn(0);
3028 }
3029 
3030 #undef __FUNCT__
3031 #define __FUNCT__ "MatDuplicateNoCreate_SeqBAIJ"
3032 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3033 {
3034   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3035   PetscErrorCode ierr;
3036   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3037 
3038   PetscFunctionBegin;
3039   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3040 
3041   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3042     c->imax           = a->imax;
3043     c->ilen           = a->ilen;
3044     c->free_imax_ilen = PETSC_FALSE;
3045   } else {
3046     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3047     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3048     for (i=0; i<mbs; i++) {
3049       c->imax[i] = a->imax[i];
3050       c->ilen[i] = a->ilen[i];
3051     }
3052     c->free_imax_ilen = PETSC_TRUE;
3053   }
3054 
3055   /* allocate the matrix space */
3056   if (mallocmatspace) {
3057     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3058       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3059       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3060 
3061       c->i            = a->i;
3062       c->j            = a->j;
3063       c->singlemalloc = PETSC_FALSE;
3064       c->free_a       = PETSC_TRUE;
3065       c->free_ij      = PETSC_FALSE;
3066       c->parent       = A;
3067       C->preallocated = PETSC_TRUE;
3068       C->assembled    = PETSC_TRUE;
3069 
3070       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3071       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3072       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3073     } else {
3074       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3075       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3076 
3077       c->singlemalloc = PETSC_TRUE;
3078       c->free_a       = PETSC_TRUE;
3079       c->free_ij      = PETSC_TRUE;
3080 
3081       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3082       if (mbs > 0) {
3083         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3084         if (cpvalues == MAT_COPY_VALUES) {
3085           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3086         } else {
3087           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3088         }
3089       }
3090       C->preallocated = PETSC_TRUE;
3091       C->assembled    = PETSC_TRUE;
3092     }
3093   }
3094 
3095   c->roworiented = a->roworiented;
3096   c->nonew       = a->nonew;
3097 
3098   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3099   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3100 
3101   c->bs2         = a->bs2;
3102   c->mbs         = a->mbs;
3103   c->nbs         = a->nbs;
3104 
3105   if (a->diag) {
3106     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3107       c->diag      = a->diag;
3108       c->free_diag = PETSC_FALSE;
3109     } else {
3110       ierr = PetscMalloc1((mbs+1),&c->diag);CHKERRQ(ierr);
3111       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3112       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3113       c->free_diag = PETSC_TRUE;
3114     }
3115   } else c->diag = 0;
3116 
3117   c->nz         = a->nz;
3118   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3119   c->solve_work = 0;
3120   c->mult_work  = 0;
3121 
3122   c->compressedrow.use   = a->compressedrow.use;
3123   c->compressedrow.nrows = a->compressedrow.nrows;
3124   if (a->compressedrow.use) {
3125     i    = a->compressedrow.nrows;
3126     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3127     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3128     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3129     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3130   } else {
3131     c->compressedrow.use    = PETSC_FALSE;
3132     c->compressedrow.i      = NULL;
3133     c->compressedrow.rindex = NULL;
3134   }
3135   C->same_nonzero = A->same_nonzero;
3136 
3137   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3138   ierr = PetscMemcpy(C->ops,A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
3139   PetscFunctionReturn(0);
3140 }
3141 
3142 #undef __FUNCT__
3143 #define __FUNCT__ "MatDuplicate_SeqBAIJ"
3144 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3145 {
3146   PetscErrorCode ierr;
3147 
3148   PetscFunctionBegin;
3149   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3150   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3151   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3152   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3153   PetscFunctionReturn(0);
3154 }
3155 
3156 #undef __FUNCT__
3157 #define __FUNCT__ "MatLoad_SeqBAIJ"
3158 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3159 {
3160   Mat_SeqBAIJ    *a;
3161   PetscErrorCode ierr;
3162   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs=1;
3163   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3164   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3165   PetscInt       *masked,nmask,tmp,bs2,ishift;
3166   PetscMPIInt    size;
3167   int            fd;
3168   PetscScalar    *aa;
3169   MPI_Comm       comm;
3170 
3171   PetscFunctionBegin;
3172   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3173   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3174   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3175   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3176   bs2  = bs*bs;
3177 
3178   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3179   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3180   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3181   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3182   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3183   M = header[1]; N = header[2]; nz = header[3];
3184 
3185   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3186   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3187 
3188   /*
3189      This code adds extra rows to make sure the number of rows is
3190     divisible by the blocksize
3191   */
3192   mbs        = M/bs;
3193   extra_rows = bs - M + bs*(mbs);
3194   if (extra_rows == bs) extra_rows = 0;
3195   else mbs++;
3196   if (extra_rows) {
3197     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3198   }
3199 
3200   /* Set global sizes if not already set */
3201   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3202     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3203   } else { /* Check if the matrix global sizes are correct */
3204     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3205     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3206       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3207     }
3208     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3209   }
3210 
3211   /* read in row lengths */
3212   ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
3213   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3214   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3215 
3216   /* read in column indices */
3217   ierr = PetscMalloc1((nz+extra_rows),&jj);CHKERRQ(ierr);
3218   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3219   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3220 
3221   /* loop over row lengths determining block row lengths */
3222   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3223   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3224   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3225   rowcount = 0;
3226   nzcount  = 0;
3227   for (i=0; i<mbs; i++) {
3228     nmask = 0;
3229     for (j=0; j<bs; j++) {
3230       kmax = rowlengths[rowcount];
3231       for (k=0; k<kmax; k++) {
3232         tmp = jj[nzcount++]/bs;
3233         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3234       }
3235       rowcount++;
3236     }
3237     browlengths[i] += nmask;
3238     /* zero out the mask elements we set */
3239     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3240   }
3241 
3242   /* Do preallocation  */
3243   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(newmat,bs,0,browlengths);CHKERRQ(ierr);
3244   a    = (Mat_SeqBAIJ*)newmat->data;
3245 
3246   /* set matrix "i" values */
3247   a->i[0] = 0;
3248   for (i=1; i<= mbs; i++) {
3249     a->i[i]      = a->i[i-1] + browlengths[i-1];
3250     a->ilen[i-1] = browlengths[i-1];
3251   }
3252   a->nz = 0;
3253   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3254 
3255   /* read in nonzero values */
3256   ierr = PetscMalloc1((nz+extra_rows),&aa);CHKERRQ(ierr);
3257   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3258   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3259 
3260   /* set "a" and "j" values into matrix */
3261   nzcount = 0; jcount = 0;
3262   for (i=0; i<mbs; i++) {
3263     nzcountb = nzcount;
3264     nmask    = 0;
3265     for (j=0; j<bs; j++) {
3266       kmax = rowlengths[i*bs+j];
3267       for (k=0; k<kmax; k++) {
3268         tmp = jj[nzcount++]/bs;
3269         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3270       }
3271     }
3272     /* sort the masked values */
3273     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3274 
3275     /* set "j" values into matrix */
3276     maskcount = 1;
3277     for (j=0; j<nmask; j++) {
3278       a->j[jcount++]  = masked[j];
3279       mask[masked[j]] = maskcount++;
3280     }
3281     /* set "a" values into matrix */
3282     ishift = bs2*a->i[i];
3283     for (j=0; j<bs; j++) {
3284       kmax = rowlengths[i*bs+j];
3285       for (k=0; k<kmax; k++) {
3286         tmp       = jj[nzcountb]/bs;
3287         block     = mask[tmp] - 1;
3288         point     = jj[nzcountb] - bs*tmp;
3289         idx       = ishift + bs2*block + j + bs*point;
3290         a->a[idx] = (MatScalar)aa[nzcountb++];
3291       }
3292     }
3293     /* zero out the mask elements we set */
3294     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3295   }
3296   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3297 
3298   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3299   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3300   ierr = PetscFree(aa);CHKERRQ(ierr);
3301   ierr = PetscFree(jj);CHKERRQ(ierr);
3302   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3303 
3304   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3305   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3306   PetscFunctionReturn(0);
3307 }
3308 
3309 #undef __FUNCT__
3310 #define __FUNCT__ "MatCreateSeqBAIJ"
3311 /*@C
3312    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3313    compressed row) format.  For good matrix assembly performance the
3314    user should preallocate the matrix storage by setting the parameter nz
3315    (or the array nnz).  By setting these parameters accurately, performance
3316    during matrix assembly can be increased by more than a factor of 50.
3317 
3318    Collective on MPI_Comm
3319 
3320    Input Parameters:
3321 +  comm - MPI communicator, set to PETSC_COMM_SELF
3322 .  bs - size of block
3323 .  m - number of rows
3324 .  n - number of columns
3325 .  nz - number of nonzero blocks  per block row (same for all rows)
3326 -  nnz - array containing the number of nonzero blocks in the various block rows
3327          (possibly different for each block row) or NULL
3328 
3329    Output Parameter:
3330 .  A - the matrix
3331 
3332    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3333    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3334    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3335 
3336    Options Database Keys:
3337 .   -mat_no_unroll - uses code that does not unroll the loops in the
3338                      block calculations (much slower)
3339 .    -mat_block_size - size of the blocks to use
3340 
3341    Level: intermediate
3342 
3343    Notes:
3344    The number of rows and columns must be divisible by blocksize.
3345 
3346    If the nnz parameter is given then the nz parameter is ignored
3347 
3348    A nonzero block is any block that as 1 or more nonzeros in it
3349 
3350    The block AIJ format is fully compatible with standard Fortran 77
3351    storage.  That is, the stored row and column indices can begin at
3352    either one (as in Fortran) or zero.  See the users' manual for details.
3353 
3354    Specify the preallocated storage with either nz or nnz (not both).
3355    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3356    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3357    matrices.
3358 
3359 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3360 @*/
3361 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3362 {
3363   PetscErrorCode ierr;
3364 
3365   PetscFunctionBegin;
3366   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3367   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3368   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3369   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3370   PetscFunctionReturn(0);
3371 }
3372 
3373 #undef __FUNCT__
3374 #define __FUNCT__ "MatSeqBAIJSetPreallocation"
3375 /*@C
3376    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3377    per row in the matrix. For good matrix assembly performance the
3378    user should preallocate the matrix storage by setting the parameter nz
3379    (or the array nnz).  By setting these parameters accurately, performance
3380    during matrix assembly can be increased by more than a factor of 50.
3381 
3382    Collective on MPI_Comm
3383 
3384    Input Parameters:
3385 +  A - the matrix
3386 .  bs - size of block
3387 .  nz - number of block nonzeros per block row (same for all rows)
3388 -  nnz - array containing the number of block nonzeros in the various block rows
3389          (possibly different for each block row) or NULL
3390 
3391    Options Database Keys:
3392 .   -mat_no_unroll - uses code that does not unroll the loops in the
3393                      block calculations (much slower)
3394 .    -mat_block_size - size of the blocks to use
3395 
3396    Level: intermediate
3397 
3398    Notes:
3399    If the nnz parameter is given then the nz parameter is ignored
3400 
3401    You can call MatGetInfo() to get information on how effective the preallocation was;
3402    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3403    You can also run with the option -info and look for messages with the string
3404    malloc in them to see if additional memory allocation was needed.
3405 
3406    The block AIJ format is fully compatible with standard Fortran 77
3407    storage.  That is, the stored row and column indices can begin at
3408    either one (as in Fortran) or zero.  See the users' manual for details.
3409 
3410    Specify the preallocated storage with either nz or nnz (not both).
3411    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3412    allocation.  See the <A href="../../docs/manual.pdf#nameddest=ch_mat">Mat chapter of the users manual</A> for details.
3413 
3414 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3415 @*/
3416 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3417 {
3418   PetscErrorCode ierr;
3419 
3420   PetscFunctionBegin;
3421   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3422   PetscValidType(B,1);
3423   PetscValidLogicalCollectiveInt(B,bs,2);
3424   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3425   PetscFunctionReturn(0);
3426 }
3427 
3428 #undef __FUNCT__
3429 #define __FUNCT__ "MatSeqBAIJSetPreallocationCSR"
3430 /*@C
3431    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3432    (the default sequential PETSc format).
3433 
3434    Collective on MPI_Comm
3435 
3436    Input Parameters:
3437 +  A - the matrix
3438 .  i - the indices into j for the start of each local row (starts with zero)
3439 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3440 -  v - optional values in the matrix
3441 
3442    Level: developer
3443 
3444    Notes:
3445    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3446    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3447    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3448    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3449    block column and the second index is over columns within a block.
3450 
3451 .keywords: matrix, aij, compressed row, sparse
3452 
3453 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3454 @*/
3455 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3456 {
3457   PetscErrorCode ierr;
3458 
3459   PetscFunctionBegin;
3460   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3461   PetscValidType(B,1);
3462   PetscValidLogicalCollectiveInt(B,bs,2);
3463   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3464   PetscFunctionReturn(0);
3465 }
3466 
3467 
3468 #undef __FUNCT__
3469 #define __FUNCT__ "MatCreateSeqBAIJWithArrays"
3470 /*@
3471      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3472 
3473      Collective on MPI_Comm
3474 
3475    Input Parameters:
3476 +  comm - must be an MPI communicator of size 1
3477 .  bs - size of block
3478 .  m - number of rows
3479 .  n - number of columns
3480 .  i - row indices
3481 .  j - column indices
3482 -  a - matrix values
3483 
3484    Output Parameter:
3485 .  mat - the matrix
3486 
3487    Level: advanced
3488 
3489    Notes:
3490        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3491     once the matrix is destroyed
3492 
3493        You cannot set new nonzero locations into this matrix, that will generate an error.
3494 
3495        The i and j indices are 0 based
3496 
3497        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3498 
3499       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3500       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3501       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3502       with column-major ordering within blocks.
3503 
3504 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3505 
3506 @*/
3507 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt *i,PetscInt *j,PetscScalar *a,Mat *mat)
3508 {
3509   PetscErrorCode ierr;
3510   PetscInt       ii;
3511   Mat_SeqBAIJ    *baij;
3512 
3513   PetscFunctionBegin;
3514   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3515   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3516 
3517   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3518   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3519   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3520   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3521   baij = (Mat_SeqBAIJ*)(*mat)->data;
3522   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3523   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3524 
3525   baij->i = i;
3526   baij->j = j;
3527   baij->a = a;
3528 
3529   baij->singlemalloc = PETSC_FALSE;
3530   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3531   baij->free_a       = PETSC_FALSE;
3532   baij->free_ij      = PETSC_FALSE;
3533 
3534   for (ii=0; ii<m; ii++) {
3535     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3536 #if defined(PETSC_USE_DEBUG)
3537     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3538 #endif
3539   }
3540 #if defined(PETSC_USE_DEBUG)
3541   for (ii=0; ii<baij->i[m]; ii++) {
3542     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3543     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3544   }
3545 #endif
3546 
3547   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3548   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3549   PetscFunctionReturn(0);
3550 }
3551