xref: /petsc/src/mat/impls/baij/seq/baij.c (revision d2fd7bfc6f0fd2e1d083decbb7cc7d77e16824f0)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912   PetscErrorCode    ierr;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953       }
954       if (N >= i) {
955         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   for (k=0; k<m; k++) { /* loop over added rows */
991     row  = im[k]; brow = row/4;
992     rp   = aj + ai[brow];
993     ap   = aa + 16*ai[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     high = nrow;
997     for (l=0; l<n; l++) { /* loop over added columns */
998       col   = in[l]; bcol = col/4;
999       ridx  = row % 4; cidx = col % 4;
1000       value = v[l + k*n];
1001       if (col <= lastcol)  low = 0;
1002       else                high = nrow;
1003       lastcol = col;
1004       while (high-low > 7) {
1005         t = (low+high)/2;
1006         if (rp[t] > bcol) high = t;
1007         else              low  = t;
1008       }
1009       for (i=low; i<high; i++) {
1010         if (rp[i] > bcol) break;
1011         if (rp[i] == bcol) {
1012           bap   = ap +  16*i + 4*cidx + ridx;
1013           *bap += value;
1014           goto noinsert1;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1049   } else {
1050     PetscInt n;
1051     n = PetscMin(a->mbs, a->nbs);
1052     diag = a->diag;
1053     for (i=0; i<n; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1226 #if defined(PETSC_HAVE_HYPRE)
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1228 #endif
1229   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1230   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1235 {
1236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_ROW_ORIENTED:
1242     a->roworiented = flg;
1243     break;
1244   case MAT_KEEP_NONZERO_PATTERN:
1245     a->keepnonzeropattern = flg;
1246     break;
1247   case MAT_NEW_NONZERO_LOCATIONS:
1248     a->nonew = (flg ? 0 : 1);
1249     break;
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     a->nonew = (flg ? -1 : 0);
1252     break;
1253   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1254     a->nonew = (flg ? -2 : 0);
1255     break;
1256   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1257     a->nounused = (flg ? -1 : 0);
1258     break;
1259   case MAT_NEW_DIAGONALS:
1260   case MAT_IGNORE_OFF_PROC_ENTRIES:
1261   case MAT_USE_HASH_TABLE:
1262   case MAT_SORTED_FULL:
1263     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1264     break;
1265   case MAT_SPD:
1266   case MAT_SYMMETRIC:
1267   case MAT_STRUCTURALLY_SYMMETRIC:
1268   case MAT_HERMITIAN:
1269   case MAT_SYMMETRY_ETERNAL:
1270   case MAT_SUBMAT_SINGLEIS:
1271   case MAT_STRUCTURE_ONLY:
1272     /* These options are handled directly by MatSetOption() */
1273     break;
1274   default:
1275     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1276   }
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1281 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1282 {
1283   PetscErrorCode ierr;
1284   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1285   MatScalar      *aa_i;
1286   PetscScalar    *v_i;
1287 
1288   PetscFunctionBegin;
1289   bs  = A->rmap->bs;
1290   bs2 = bs*bs;
1291   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1292 
1293   bn  = row/bs;   /* Block number */
1294   bp  = row % bs; /* Block Position */
1295   M   = ai[bn+1] - ai[bn];
1296   *nz = bs*M;
1297 
1298   if (v) {
1299     *v = 0;
1300     if (*nz) {
1301       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1302       for (i=0; i<M; i++) { /* for each block in the block row */
1303         v_i  = *v + i*bs;
1304         aa_i = aa + bs2*(ai[bn] + i);
1305         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1306       }
1307     }
1308   }
1309 
1310   if (idx) {
1311     *idx = 0;
1312     if (*nz) {
1313       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1314       for (i=0; i<M; i++) { /* for each block in the block row */
1315         idx_i = *idx + i*bs;
1316         itmp  = bs*aj[ai[bn] + i];
1317         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1318       }
1319     }
1320   }
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1325 {
1326   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1331   PetscFunctionReturn(0);
1332 }
1333 
1334 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1335 {
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1340   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1345 {
1346   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1347   Mat            C;
1348   PetscErrorCode ierr;
1349   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1350   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1351   MatScalar      *ata,*aa=a->a;
1352 
1353   PetscFunctionBegin;
1354   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1355   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1356     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1357 
1358     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1359     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1360     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1361     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1362 
1363     at  = (Mat_SeqBAIJ*)C->data;
1364     ati = at->i;
1365     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1366   } else {
1367     C = *B;
1368     at = (Mat_SeqBAIJ*)C->data;
1369     ati = at->i;
1370   }
1371 
1372   atj = at->j;
1373   ata = at->a;
1374 
1375   /* Copy ati into atfill so we have locations of the next free space in atj */
1376   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1377 
1378   /* Walk through A row-wise and mark nonzero entries of A^T. */
1379   for (i=0; i<mbs; i++) {
1380     anzj = ai[i+1] - ai[i];
1381     for (j=0; j<anzj; j++) {
1382       atj[atfill[*aj]] = i;
1383       for (kr=0; kr<bs; kr++) {
1384         for (k=0; k<bs; k++) {
1385           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1386         }
1387       }
1388       atfill[*aj++] += 1;
1389     }
1390   }
1391   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1393 
1394   /* Clean up temporary space and complete requests. */
1395   ierr = PetscFree(atfill);CHKERRQ(ierr);
1396 
1397   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1398     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1399     *B = C;
1400   } else {
1401     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1407 {
1408   PetscErrorCode ierr;
1409   Mat            Btrans;
1410 
1411   PetscFunctionBegin;
1412   *f   = PETSC_FALSE;
1413   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1414   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1415   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1420 {
1421   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1422   PetscErrorCode ierr;
1423   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1424   int            fd;
1425   PetscScalar    *aa;
1426   FILE           *file;
1427 
1428   PetscFunctionBegin;
1429   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1430   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1431   col_lens[0] = MAT_FILE_CLASSID;
1432 
1433   col_lens[1] = A->rmap->N;
1434   col_lens[2] = A->cmap->n;
1435   col_lens[3] = a->nz*bs2;
1436 
1437   /* store lengths of each row and write (including header) to file */
1438   count = 0;
1439   for (i=0; i<a->mbs; i++) {
1440     for (j=0; j<bs; j++) {
1441       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1442     }
1443   }
1444   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1445   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1446 
1447   /* store column indices (zero start index) */
1448   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1449   count = 0;
1450   for (i=0; i<a->mbs; i++) {
1451     for (j=0; j<bs; j++) {
1452       for (k=a->i[i]; k<a->i[i+1]; k++) {
1453         for (l=0; l<bs; l++) {
1454           jj[count++] = bs*a->j[k] + l;
1455         }
1456       }
1457     }
1458   }
1459   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1460   ierr = PetscFree(jj);CHKERRQ(ierr);
1461 
1462   /* store nonzero values */
1463   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1464   count = 0;
1465   for (i=0; i<a->mbs; i++) {
1466     for (j=0; j<bs; j++) {
1467       for (k=a->i[i]; k<a->i[i+1]; k++) {
1468         for (l=0; l<bs; l++) {
1469           aa[count++] = a->a[bs2*k + l*bs + j];
1470         }
1471       }
1472     }
1473   }
1474   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1475   ierr = PetscFree(aa);CHKERRQ(ierr);
1476 
1477   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1478   if (file) {
1479     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1480   }
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1485 {
1486   PetscErrorCode ierr;
1487   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1488   PetscInt       i,bs = A->rmap->bs,k;
1489 
1490   PetscFunctionBegin;
1491   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1492   for (i=0; i<a->mbs; i++) {
1493     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1494     for (k=a->i[i]; k<a->i[i+1]; k++) {
1495       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1496     }
1497     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1498   }
1499   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1500   PetscFunctionReturn(0);
1501 }
1502 
1503 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1504 {
1505   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1506   PetscErrorCode    ierr;
1507   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1508   PetscViewerFormat format;
1509 
1510   PetscFunctionBegin;
1511   if (A->structure_only) {
1512     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1513     PetscFunctionReturn(0);
1514   }
1515 
1516   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1517   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1518     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1520     const char *matname;
1521     Mat        aij;
1522     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1523     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1524     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1525     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1526     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1527   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1528       PetscFunctionReturn(0);
1529   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1530     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1531     for (i=0; i<a->mbs; i++) {
1532       for (j=0; j<bs; j++) {
1533         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1534         for (k=a->i[i]; k<a->i[i+1]; k++) {
1535           for (l=0; l<bs; l++) {
1536 #if defined(PETSC_USE_COMPLEX)
1537             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1542                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1545             }
1546 #else
1547             if (a->a[bs2*k + l*bs + j] != 0.0) {
1548               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1549             }
1550 #endif
1551           }
1552         }
1553         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1554       }
1555     }
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1557   } else {
1558     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1559     for (i=0; i<a->mbs; i++) {
1560       for (j=0; j<bs; j++) {
1561         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1562         for (k=a->i[i]; k<a->i[i+1]; k++) {
1563           for (l=0; l<bs; l++) {
1564 #if defined(PETSC_USE_COMPLEX)
1565             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1567                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1570                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             } else {
1572               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1573             }
1574 #else
1575             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1576 #endif
1577           }
1578         }
1579         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1580       }
1581     }
1582     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1583   }
1584   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1585   PetscFunctionReturn(0);
1586 }
1587 
1588 #include <petscdraw.h>
1589 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1590 {
1591   Mat               A = (Mat) Aa;
1592   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1593   PetscErrorCode    ierr;
1594   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1595   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1596   MatScalar         *aa;
1597   PetscViewer       viewer;
1598   PetscViewerFormat format;
1599 
1600   PetscFunctionBegin;
1601   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1602   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1603   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1604 
1605   /* loop over matrix elements drawing boxes */
1606 
1607   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1608     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1609     /* Blue for negative, Cyan for zero and  Red for positive */
1610     color = PETSC_DRAW_BLUE;
1611     for (i=0,row=0; i<mbs; i++,row+=bs) {
1612       for (j=a->i[i]; j<a->i[i+1]; j++) {
1613         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1614         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1615         aa  = a->a + j*bs2;
1616         for (k=0; k<bs; k++) {
1617           for (l=0; l<bs; l++) {
1618             if (PetscRealPart(*aa++) >=  0.) continue;
1619             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1620           }
1621         }
1622       }
1623     }
1624     color = PETSC_DRAW_CYAN;
1625     for (i=0,row=0; i<mbs; i++,row+=bs) {
1626       for (j=a->i[i]; j<a->i[i+1]; j++) {
1627         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1628         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1629         aa  = a->a + j*bs2;
1630         for (k=0; k<bs; k++) {
1631           for (l=0; l<bs; l++) {
1632             if (PetscRealPart(*aa++) != 0.) continue;
1633             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1634           }
1635         }
1636       }
1637     }
1638     color = PETSC_DRAW_RED;
1639     for (i=0,row=0; i<mbs; i++,row+=bs) {
1640       for (j=a->i[i]; j<a->i[i+1]; j++) {
1641         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1642         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1643         aa  = a->a + j*bs2;
1644         for (k=0; k<bs; k++) {
1645           for (l=0; l<bs; l++) {
1646             if (PetscRealPart(*aa++) <= 0.) continue;
1647             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1648           }
1649         }
1650       }
1651     }
1652     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1653   } else {
1654     /* use contour shading to indicate magnitude of values */
1655     /* first determine max of all nonzero values */
1656     PetscReal minv = 0.0, maxv = 0.0;
1657     PetscDraw popup;
1658 
1659     for (i=0; i<a->nz*a->bs2; i++) {
1660       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1661     }
1662     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1663     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1664     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1665 
1666     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1667     for (i=0,row=0; i<mbs; i++,row+=bs) {
1668       for (j=a->i[i]; j<a->i[i+1]; j++) {
1669         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1670         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1671         aa  = a->a + j*bs2;
1672         for (k=0; k<bs; k++) {
1673           for (l=0; l<bs; l++) {
1674             MatScalar v = *aa++;
1675             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1676             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1677           }
1678         }
1679       }
1680     }
1681     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1682   }
1683   PetscFunctionReturn(0);
1684 }
1685 
1686 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1687 {
1688   PetscErrorCode ierr;
1689   PetscReal      xl,yl,xr,yr,w,h;
1690   PetscDraw      draw;
1691   PetscBool      isnull;
1692 
1693   PetscFunctionBegin;
1694   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1695   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1696   if (isnull) PetscFunctionReturn(0);
1697 
1698   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1699   xr  += w;          yr += h;        xl = -w;     yl = -h;
1700   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1701   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1702   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1703   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1704   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1705   PetscFunctionReturn(0);
1706 }
1707 
1708 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1709 {
1710   PetscErrorCode ierr;
1711   PetscBool      iascii,isbinary,isdraw;
1712 
1713   PetscFunctionBegin;
1714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1715   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1716   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1717   if (iascii) {
1718     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1719   } else if (isbinary) {
1720     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1721   } else if (isdraw) {
1722     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1723   } else {
1724     Mat B;
1725     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1726     ierr = MatView(B,viewer);CHKERRQ(ierr);
1727     ierr = MatDestroy(&B);CHKERRQ(ierr);
1728   }
1729   PetscFunctionReturn(0);
1730 }
1731 
1732 
1733 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1734 {
1735   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1736   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1737   PetscInt    *ai = a->i,*ailen = a->ilen;
1738   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1739   MatScalar   *ap,*aa = a->a;
1740 
1741   PetscFunctionBegin;
1742   for (k=0; k<m; k++) { /* loop over rows */
1743     row = im[k]; brow = row/bs;
1744     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1745     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1746     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1747     nrow = ailen[brow];
1748     for (l=0; l<n; l++) { /* loop over columns */
1749       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1750       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1751       col  = in[l];
1752       bcol = col/bs;
1753       cidx = col%bs;
1754       ridx = row%bs;
1755       high = nrow;
1756       low  = 0; /* assume unsorted */
1757       while (high-low > 5) {
1758         t = (low+high)/2;
1759         if (rp[t] > bcol) high = t;
1760         else             low  = t;
1761       }
1762       for (i=low; i<high; i++) {
1763         if (rp[i] > bcol) break;
1764         if (rp[i] == bcol) {
1765           *v++ = ap[bs2*i+bs*cidx+ridx];
1766           goto finished;
1767         }
1768       }
1769       *v++ = 0.0;
1770 finished:;
1771     }
1772   }
1773   PetscFunctionReturn(0);
1774 }
1775 
1776 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1777 {
1778   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1779   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1780   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1781   PetscErrorCode    ierr;
1782   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1783   PetscBool         roworiented=a->roworiented;
1784   const PetscScalar *value     = v;
1785   MatScalar         *ap=NULL,*aa = a->a,*bap;
1786 
1787   PetscFunctionBegin;
1788   if (roworiented) {
1789     stepval = (n-1)*bs;
1790   } else {
1791     stepval = (m-1)*bs;
1792   }
1793   for (k=0; k<m; k++) { /* loop over added rows */
1794     row = im[k];
1795     if (row < 0) continue;
1796 #if defined(PETSC_USE_DEBUG)
1797     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1798 #endif
1799     rp   = aj + ai[row];
1800     if (!A->structure_only) ap = aa + bs2*ai[row];
1801     rmax = imax[row];
1802     nrow = ailen[row];
1803     low  = 0;
1804     high = nrow;
1805     for (l=0; l<n; l++) { /* loop over added columns */
1806       if (in[l] < 0) continue;
1807 #if defined(PETSC_USE_DEBUG)
1808       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1809 #endif
1810       col = in[l];
1811       if (!A->structure_only) {
1812         if (roworiented) {
1813           value = v + (k*(stepval+bs) + l)*bs;
1814         } else {
1815           value = v + (l*(stepval+bs) + k)*bs;
1816         }
1817       }
1818       if (col <= lastcol) low = 0;
1819       else high = nrow;
1820       lastcol = col;
1821       while (high-low > 7) {
1822         t = (low+high)/2;
1823         if (rp[t] > col) high = t;
1824         else             low  = t;
1825       }
1826       for (i=low; i<high; i++) {
1827         if (rp[i] > col) break;
1828         if (rp[i] == col) {
1829           if (A->structure_only) goto noinsert2;
1830           bap = ap +  bs2*i;
1831           if (roworiented) {
1832             if (is == ADD_VALUES) {
1833               for (ii=0; ii<bs; ii++,value+=stepval) {
1834                 for (jj=ii; jj<bs2; jj+=bs) {
1835                   bap[jj] += *value++;
1836                 }
1837               }
1838             } else {
1839               for (ii=0; ii<bs; ii++,value+=stepval) {
1840                 for (jj=ii; jj<bs2; jj+=bs) {
1841                   bap[jj] = *value++;
1842                 }
1843               }
1844             }
1845           } else {
1846             if (is == ADD_VALUES) {
1847               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1848                 for (jj=0; jj<bs; jj++) {
1849                   bap[jj] += value[jj];
1850                 }
1851                 bap += bs;
1852               }
1853             } else {
1854               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1855                 for (jj=0; jj<bs; jj++) {
1856                   bap[jj]  = value[jj];
1857                 }
1858                 bap += bs;
1859               }
1860             }
1861           }
1862           goto noinsert2;
1863         }
1864       }
1865       if (nonew == 1) goto noinsert2;
1866       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1867       if (A->structure_only) {
1868         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1869       } else {
1870         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1871       }
1872       N = nrow++ - 1; high++;
1873       /* shift up all the later entries in this row */
1874       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1875       rp[i] = col;
1876       if (!A->structure_only) {
1877         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1878         bap   = ap +  bs2*i;
1879         if (roworiented) {
1880           for (ii=0; ii<bs; ii++,value+=stepval) {
1881             for (jj=ii; jj<bs2; jj+=bs) {
1882               bap[jj] = *value++;
1883             }
1884           }
1885         } else {
1886           for (ii=0; ii<bs; ii++,value+=stepval) {
1887             for (jj=0; jj<bs; jj++) {
1888               *bap++ = *value++;
1889             }
1890           }
1891         }
1892       }
1893 noinsert2:;
1894       low = i;
1895     }
1896     ailen[row] = nrow;
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1902 {
1903   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1904   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1905   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1906   PetscErrorCode ierr;
1907   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1908   MatScalar      *aa  = a->a,*ap;
1909   PetscReal      ratio=0.6;
1910 
1911   PetscFunctionBegin;
1912   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1913 
1914   if (m) rmax = ailen[0];
1915   for (i=1; i<mbs; i++) {
1916     /* move each row back by the amount of empty slots (fshift) before it*/
1917     fshift += imax[i-1] - ailen[i-1];
1918     rmax    = PetscMax(rmax,ailen[i]);
1919     if (fshift) {
1920       ip = aj + ai[i];
1921       ap = aa + bs2*ai[i];
1922       N  = ailen[i];
1923       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1924       if (!A->structure_only) {
1925         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1926       }
1927     }
1928     ai[i] = ai[i-1] + ailen[i-1];
1929   }
1930   if (mbs) {
1931     fshift += imax[mbs-1] - ailen[mbs-1];
1932     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1933   }
1934 
1935   /* reset ilen and imax for each row */
1936   a->nonzerorowcnt = 0;
1937   if (A->structure_only) {
1938     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1939   } else { /* !A->structure_only */
1940     for (i=0; i<mbs; i++) {
1941       ailen[i] = imax[i] = ai[i+1] - ai[i];
1942       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1943     }
1944   }
1945   a->nz = ai[mbs];
1946 
1947   /* diagonals may have moved, so kill the diagonal pointers */
1948   a->idiagvalid = PETSC_FALSE;
1949   if (fshift && a->diag) {
1950     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1951     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1952     a->diag = 0;
1953   }
1954   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1955   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1956   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1957   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1958 
1959   A->info.mallocs    += a->reallocs;
1960   a->reallocs         = 0;
1961   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1962   a->rmax             = rmax;
1963 
1964   if (!A->structure_only) {
1965     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1966   }
1967   PetscFunctionReturn(0);
1968 }
1969 
1970 /*
1971    This function returns an array of flags which indicate the locations of contiguous
1972    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1973    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1974    Assume: sizes should be long enough to hold all the values.
1975 */
1976 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1977 {
1978   PetscInt  i,j,k,row;
1979   PetscBool flg;
1980 
1981   PetscFunctionBegin;
1982   for (i=0,j=0; i<n; j++) {
1983     row = idx[i];
1984     if (row%bs!=0) { /* Not the begining of a block */
1985       sizes[j] = 1;
1986       i++;
1987     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1988       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1989       i++;
1990     } else { /* Begining of the block, so check if the complete block exists */
1991       flg = PETSC_TRUE;
1992       for (k=1; k<bs; k++) {
1993         if (row+k != idx[i+k]) { /* break in the block */
1994           flg = PETSC_FALSE;
1995           break;
1996         }
1997       }
1998       if (flg) { /* No break in the bs */
1999         sizes[j] = bs;
2000         i       += bs;
2001       } else {
2002         sizes[j] = 1;
2003         i++;
2004       }
2005     }
2006   }
2007   *bs_max = j;
2008   PetscFunctionReturn(0);
2009 }
2010 
2011 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2012 {
2013   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2014   PetscErrorCode    ierr;
2015   PetscInt          i,j,k,count,*rows;
2016   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2017   PetscScalar       zero = 0.0;
2018   MatScalar         *aa;
2019   const PetscScalar *xx;
2020   PetscScalar       *bb;
2021 
2022   PetscFunctionBegin;
2023   /* fix right hand side if needed */
2024   if (x && b) {
2025     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2026     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2027     for (i=0; i<is_n; i++) {
2028       bb[is_idx[i]] = diag*xx[is_idx[i]];
2029     }
2030     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2031     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2032   }
2033 
2034   /* Make a copy of the IS and  sort it */
2035   /* allocate memory for rows,sizes */
2036   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2037 
2038   /* copy IS values to rows, and sort them */
2039   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2040   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2041 
2042   if (baij->keepnonzeropattern) {
2043     for (i=0; i<is_n; i++) sizes[i] = 1;
2044     bs_max          = is_n;
2045   } else {
2046     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2047     A->nonzerostate++;
2048   }
2049 
2050   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2051     row = rows[j];
2052     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2053     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2054     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2055     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2056       if (diag != (PetscScalar)0.0) {
2057         if (baij->ilen[row/bs] > 0) {
2058           baij->ilen[row/bs]       = 1;
2059           baij->j[baij->i[row/bs]] = row/bs;
2060 
2061           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2062         }
2063         /* Now insert all the diagonal values for this bs */
2064         for (k=0; k<bs; k++) {
2065           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2066         }
2067       } else { /* (diag == 0.0) */
2068         baij->ilen[row/bs] = 0;
2069       } /* end (diag == 0.0) */
2070     } else { /* (sizes[i] != bs) */
2071 #if defined(PETSC_USE_DEBUG)
2072       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2073 #endif
2074       for (k=0; k<count; k++) {
2075         aa[0] =  zero;
2076         aa   += bs;
2077       }
2078       if (diag != (PetscScalar)0.0) {
2079         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2080       }
2081     }
2082   }
2083 
2084   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2085   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2090 {
2091   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2092   PetscErrorCode    ierr;
2093   PetscInt          i,j,k,count;
2094   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2095   PetscScalar       zero = 0.0;
2096   MatScalar         *aa;
2097   const PetscScalar *xx;
2098   PetscScalar       *bb;
2099   PetscBool         *zeroed,vecs = PETSC_FALSE;
2100 
2101   PetscFunctionBegin;
2102   /* fix right hand side if needed */
2103   if (x && b) {
2104     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2105     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2106     vecs = PETSC_TRUE;
2107   }
2108 
2109   /* zero the columns */
2110   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2111   for (i=0; i<is_n; i++) {
2112     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2113     zeroed[is_idx[i]] = PETSC_TRUE;
2114   }
2115   for (i=0; i<A->rmap->N; i++) {
2116     if (!zeroed[i]) {
2117       row = i/bs;
2118       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2119         for (k=0; k<bs; k++) {
2120           col = bs*baij->j[j] + k;
2121           if (zeroed[col]) {
2122             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2123             if (vecs) bb[i] -= aa[0]*xx[col];
2124             aa[0] = 0.0;
2125           }
2126         }
2127       }
2128     } else if (vecs) bb[i] = diag*xx[i];
2129   }
2130   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2131   if (vecs) {
2132     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2133     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2134   }
2135 
2136   /* zero the rows */
2137   for (i=0; i<is_n; i++) {
2138     row   = is_idx[i];
2139     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2140     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2141     for (k=0; k<count; k++) {
2142       aa[0] =  zero;
2143       aa   += bs;
2144     }
2145     if (diag != (PetscScalar)0.0) {
2146       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2147     }
2148   }
2149   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2150   PetscFunctionReturn(0);
2151 }
2152 
2153 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2154 {
2155   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2156   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2157   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2158   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2159   PetscErrorCode ierr;
2160   PetscInt       ridx,cidx,bs2=a->bs2;
2161   PetscBool      roworiented=a->roworiented;
2162   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2163 
2164   PetscFunctionBegin;
2165   for (k=0; k<m; k++) { /* loop over added rows */
2166     row  = im[k];
2167     brow = row/bs;
2168     if (row < 0) continue;
2169 #if defined(PETSC_USE_DEBUG)
2170     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2171 #endif
2172     rp   = aj + ai[brow];
2173     if (!A->structure_only) ap = aa + bs2*ai[brow];
2174     rmax = imax[brow];
2175     nrow = ailen[brow];
2176     low  = 0;
2177     high = nrow;
2178     for (l=0; l<n; l++) { /* loop over added columns */
2179       if (in[l] < 0) continue;
2180 #if defined(PETSC_USE_DEBUG)
2181       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2182 #endif
2183       col  = in[l]; bcol = col/bs;
2184       ridx = row % bs; cidx = col % bs;
2185       if (!A->structure_only) {
2186         if (roworiented) {
2187           value = v[l + k*n];
2188         } else {
2189           value = v[k + l*m];
2190         }
2191       }
2192       if (col <= lastcol) low = 0; else high = nrow;
2193       lastcol = col;
2194       while (high-low > 7) {
2195         t = (low+high)/2;
2196         if (rp[t] > bcol) high = t;
2197         else              low  = t;
2198       }
2199       for (i=low; i<high; i++) {
2200         if (rp[i] > bcol) break;
2201         if (rp[i] == bcol) {
2202           bap = ap +  bs2*i + bs*cidx + ridx;
2203           if (!A->structure_only) {
2204             if (is == ADD_VALUES) *bap += value;
2205             else                  *bap  = value;
2206           }
2207           goto noinsert1;
2208         }
2209       }
2210       if (nonew == 1) goto noinsert1;
2211       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2212       if (A->structure_only) {
2213         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2214       } else {
2215         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2216       }
2217       N = nrow++ - 1; high++;
2218       /* shift up all the later entries in this row */
2219       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2220       rp[i] = bcol;
2221       if (!A->structure_only) {
2222         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2223         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2224         ap[bs2*i + bs*cidx + ridx] = value;
2225       }
2226       a->nz++;
2227       A->nonzerostate++;
2228 noinsert1:;
2229       low = i;
2230     }
2231     ailen[brow] = nrow;
2232   }
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2237 {
2238   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2239   Mat            outA;
2240   PetscErrorCode ierr;
2241   PetscBool      row_identity,col_identity;
2242 
2243   PetscFunctionBegin;
2244   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2245   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2246   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2247   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2248 
2249   outA            = inA;
2250   inA->factortype = MAT_FACTOR_LU;
2251   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2252   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2253 
2254   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2255 
2256   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2257   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2258   a->row = row;
2259   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2260   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2261   a->col = col;
2262 
2263   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2264   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2265   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2266   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2267 
2268   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2269   if (!a->solve_work) {
2270     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2271     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2272   }
2273   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2278 {
2279   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2280   PetscInt    i,nz,mbs;
2281 
2282   PetscFunctionBegin;
2283   nz  = baij->maxnz;
2284   mbs = baij->mbs;
2285   for (i=0; i<nz; i++) {
2286     baij->j[i] = indices[i];
2287   }
2288   baij->nz = nz;
2289   for (i=0; i<mbs; i++) {
2290     baij->ilen[i] = baij->imax[i];
2291   }
2292   PetscFunctionReturn(0);
2293 }
2294 
2295 /*@
2296     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2297        in the matrix.
2298 
2299   Input Parameters:
2300 +  mat - the SeqBAIJ matrix
2301 -  indices - the column indices
2302 
2303   Level: advanced
2304 
2305   Notes:
2306     This can be called if you have precomputed the nonzero structure of the
2307   matrix and want to provide it to the matrix object to improve the performance
2308   of the MatSetValues() operation.
2309 
2310     You MUST have set the correct numbers of nonzeros per row in the call to
2311   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2312 
2313     MUST be called before any calls to MatSetValues();
2314 
2315 @*/
2316 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2317 {
2318   PetscErrorCode ierr;
2319 
2320   PetscFunctionBegin;
2321   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2322   PetscValidPointer(indices,2);
2323   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2324   PetscFunctionReturn(0);
2325 }
2326 
2327 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2328 {
2329   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2330   PetscErrorCode ierr;
2331   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2332   PetscReal      atmp;
2333   PetscScalar    *x,zero = 0.0;
2334   MatScalar      *aa;
2335   PetscInt       ncols,brow,krow,kcol;
2336 
2337   PetscFunctionBegin;
2338   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2339   bs  = A->rmap->bs;
2340   aa  = a->a;
2341   ai  = a->i;
2342   aj  = a->j;
2343   mbs = a->mbs;
2344 
2345   ierr = VecSet(v,zero);CHKERRQ(ierr);
2346   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2347   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2348   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2349   for (i=0; i<mbs; i++) {
2350     ncols = ai[1] - ai[0]; ai++;
2351     brow  = bs*i;
2352     for (j=0; j<ncols; j++) {
2353       for (kcol=0; kcol<bs; kcol++) {
2354         for (krow=0; krow<bs; krow++) {
2355           atmp = PetscAbsScalar(*aa);aa++;
2356           row  = brow + krow;   /* row index */
2357           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2358         }
2359       }
2360       aj++;
2361     }
2362   }
2363   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2368 {
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   /* If the two matrices have the same copy implementation, use fast copy. */
2373   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2374     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2375     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2376     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2377 
2378     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2379     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2380     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2381     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2382   } else {
2383     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2384   }
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2389 {
2390   PetscErrorCode ierr;
2391 
2392   PetscFunctionBegin;
2393   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2398 {
2399   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2400 
2401   PetscFunctionBegin;
2402   *array = a->a;
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2407 {
2408   PetscFunctionBegin;
2409   *array = NULL;
2410   PetscFunctionReturn(0);
2411 }
2412 
2413 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2414 {
2415   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2416   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2417   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2418   PetscErrorCode ierr;
2419 
2420   PetscFunctionBegin;
2421   /* Set the number of nonzeros in the new matrix */
2422   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2423   PetscFunctionReturn(0);
2424 }
2425 
2426 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2427 {
2428   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2429   PetscErrorCode ierr;
2430   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2431   PetscBLASInt   one=1;
2432 
2433   PetscFunctionBegin;
2434   if (str == SAME_NONZERO_PATTERN) {
2435     PetscScalar  alpha = a;
2436     PetscBLASInt bnz;
2437     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2438     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2439     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2440   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2441     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2442   } else {
2443     Mat      B;
2444     PetscInt *nnz;
2445     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2446     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2447     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2448     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2449     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2450     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2451     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2452     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2453     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2454     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2455     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2456     ierr = PetscFree(nnz);CHKERRQ(ierr);
2457   }
2458   PetscFunctionReturn(0);
2459 }
2460 
2461 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2462 {
2463   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2464   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2465   MatScalar   *aa = a->a;
2466 
2467   PetscFunctionBegin;
2468   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2469   PetscFunctionReturn(0);
2470 }
2471 
2472 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2473 {
2474   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2475   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2476   MatScalar   *aa = a->a;
2477 
2478   PetscFunctionBegin;
2479   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 /*
2484     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2485 */
2486 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2487 {
2488   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2489   PetscErrorCode ierr;
2490   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2491   PetscInt       nz = a->i[m],row,*jj,mr,col;
2492 
2493   PetscFunctionBegin;
2494   *nn = n;
2495   if (!ia) PetscFunctionReturn(0);
2496   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2497   else {
2498     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2499     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2500     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2501     jj   = a->j;
2502     for (i=0; i<nz; i++) {
2503       collengths[jj[i]]++;
2504     }
2505     cia[0] = oshift;
2506     for (i=0; i<n; i++) {
2507       cia[i+1] = cia[i] + collengths[i];
2508     }
2509     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2510     jj   = a->j;
2511     for (row=0; row<m; row++) {
2512       mr = a->i[row+1] - a->i[row];
2513       for (i=0; i<mr; i++) {
2514         col = *jj++;
2515 
2516         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2517       }
2518     }
2519     ierr = PetscFree(collengths);CHKERRQ(ierr);
2520     *ia  = cia; *ja = cja;
2521   }
2522   PetscFunctionReturn(0);
2523 }
2524 
2525 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2526 {
2527   PetscErrorCode ierr;
2528 
2529   PetscFunctionBegin;
2530   if (!ia) PetscFunctionReturn(0);
2531   ierr = PetscFree(*ia);CHKERRQ(ierr);
2532   ierr = PetscFree(*ja);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 /*
2537  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2538  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2539  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2540  */
2541 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2542 {
2543   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2544   PetscErrorCode ierr;
2545   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2546   PetscInt       nz = a->i[m],row,*jj,mr,col;
2547   PetscInt       *cspidx;
2548 
2549   PetscFunctionBegin;
2550   *nn = n;
2551   if (!ia) PetscFunctionReturn(0);
2552 
2553   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2554   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2555   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2556   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2557   jj   = a->j;
2558   for (i=0; i<nz; i++) {
2559     collengths[jj[i]]++;
2560   }
2561   cia[0] = oshift;
2562   for (i=0; i<n; i++) {
2563     cia[i+1] = cia[i] + collengths[i];
2564   }
2565   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2566   jj   = a->j;
2567   for (row=0; row<m; row++) {
2568     mr = a->i[row+1] - a->i[row];
2569     for (i=0; i<mr; i++) {
2570       col = *jj++;
2571       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2572       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2573     }
2574   }
2575   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2576   *ia    = cia;
2577   *ja    = cja;
2578   *spidx = cspidx;
2579   PetscFunctionReturn(0);
2580 }
2581 
2582 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2583 {
2584   PetscErrorCode ierr;
2585 
2586   PetscFunctionBegin;
2587   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2588   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2589   PetscFunctionReturn(0);
2590 }
2591 
2592 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2593 {
2594   PetscErrorCode ierr;
2595   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2596 
2597   PetscFunctionBegin;
2598   if (!Y->preallocated || !aij->nz) {
2599     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2600   }
2601   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2602   PetscFunctionReturn(0);
2603 }
2604 
2605 /* -------------------------------------------------------------------*/
2606 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2607                                        MatGetRow_SeqBAIJ,
2608                                        MatRestoreRow_SeqBAIJ,
2609                                        MatMult_SeqBAIJ_N,
2610                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2611                                        MatMultTranspose_SeqBAIJ,
2612                                        MatMultTransposeAdd_SeqBAIJ,
2613                                        0,
2614                                        0,
2615                                        0,
2616                                /* 10*/ 0,
2617                                        MatLUFactor_SeqBAIJ,
2618                                        0,
2619                                        0,
2620                                        MatTranspose_SeqBAIJ,
2621                                /* 15*/ MatGetInfo_SeqBAIJ,
2622                                        MatEqual_SeqBAIJ,
2623                                        MatGetDiagonal_SeqBAIJ,
2624                                        MatDiagonalScale_SeqBAIJ,
2625                                        MatNorm_SeqBAIJ,
2626                                /* 20*/ 0,
2627                                        MatAssemblyEnd_SeqBAIJ,
2628                                        MatSetOption_SeqBAIJ,
2629                                        MatZeroEntries_SeqBAIJ,
2630                                /* 24*/ MatZeroRows_SeqBAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                /* 29*/ MatSetUp_SeqBAIJ,
2636                                        0,
2637                                        0,
2638                                        0,
2639                                        0,
2640                                /* 34*/ MatDuplicate_SeqBAIJ,
2641                                        0,
2642                                        0,
2643                                        MatILUFactor_SeqBAIJ,
2644                                        0,
2645                                /* 39*/ MatAXPY_SeqBAIJ,
2646                                        MatCreateSubMatrices_SeqBAIJ,
2647                                        MatIncreaseOverlap_SeqBAIJ,
2648                                        MatGetValues_SeqBAIJ,
2649                                        MatCopy_SeqBAIJ,
2650                                /* 44*/ 0,
2651                                        MatScale_SeqBAIJ,
2652                                        MatShift_SeqBAIJ,
2653                                        0,
2654                                        MatZeroRowsColumns_SeqBAIJ,
2655                                /* 49*/ 0,
2656                                        MatGetRowIJ_SeqBAIJ,
2657                                        MatRestoreRowIJ_SeqBAIJ,
2658                                        MatGetColumnIJ_SeqBAIJ,
2659                                        MatRestoreColumnIJ_SeqBAIJ,
2660                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2661                                        0,
2662                                        0,
2663                                        0,
2664                                        MatSetValuesBlocked_SeqBAIJ,
2665                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2666                                        MatDestroy_SeqBAIJ,
2667                                        MatView_SeqBAIJ,
2668                                        0,
2669                                        0,
2670                                /* 64*/ 0,
2671                                        0,
2672                                        0,
2673                                        0,
2674                                        0,
2675                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2676                                        0,
2677                                        MatConvert_Basic,
2678                                        0,
2679                                        0,
2680                                /* 74*/ 0,
2681                                        MatFDColoringApply_BAIJ,
2682                                        0,
2683                                        0,
2684                                        0,
2685                                /* 79*/ 0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        MatLoad_SeqBAIJ,
2690                                /* 84*/ 0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                /* 89*/ 0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                /* 94*/ 0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                /* 99*/ 0,
2706                                        0,
2707                                        0,
2708                                        0,
2709                                        0,
2710                                /*104*/ 0,
2711                                        MatRealPart_SeqBAIJ,
2712                                        MatImaginaryPart_SeqBAIJ,
2713                                        0,
2714                                        0,
2715                                /*109*/ 0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        MatMissingDiagonal_SeqBAIJ,
2720                                /*114*/ 0,
2721                                        0,
2722                                        0,
2723                                        0,
2724                                        0,
2725                                /*119*/ 0,
2726                                        0,
2727                                        MatMultHermitianTranspose_SeqBAIJ,
2728                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2729                                        0,
2730                                /*124*/ 0,
2731                                        0,
2732                                        MatInvertBlockDiagonal_SeqBAIJ,
2733                                        0,
2734                                        0,
2735                                /*129*/ 0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                /*134*/ 0,
2741                                        0,
2742                                        0,
2743                                        0,
2744                                        0,
2745                                /*139*/ MatSetBlockSizes_Default,
2746                                        0,
2747                                        0,
2748                                        MatFDColoringSetUp_SeqXAIJ,
2749                                        0,
2750                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2751                                        MatDestroySubMatrices_SeqBAIJ
2752 };
2753 
2754 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2755 {
2756   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2757   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2758   PetscErrorCode ierr;
2759 
2760   PetscFunctionBegin;
2761   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2762 
2763   /* allocate space for values if not already there */
2764   if (!aij->saved_values) {
2765     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2766     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2767   }
2768 
2769   /* copy values over */
2770   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2771   PetscFunctionReturn(0);
2772 }
2773 
2774 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2775 {
2776   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2777   PetscErrorCode ierr;
2778   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2779 
2780   PetscFunctionBegin;
2781   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2782   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2783 
2784   /* copy values over */
2785   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2786   PetscFunctionReturn(0);
2787 }
2788 
2789 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2790 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2791 
2792 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2793 {
2794   Mat_SeqBAIJ    *b;
2795   PetscErrorCode ierr;
2796   PetscInt       i,mbs,nbs,bs2;
2797   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2798 
2799   PetscFunctionBegin;
2800   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2801   if (nz == MAT_SKIP_ALLOCATION) {
2802     skipallocation = PETSC_TRUE;
2803     nz             = 0;
2804   }
2805 
2806   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2807   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2808   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2809   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2810 
2811   B->preallocated = PETSC_TRUE;
2812 
2813   mbs = B->rmap->n/bs;
2814   nbs = B->cmap->n/bs;
2815   bs2 = bs*bs;
2816 
2817   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2818 
2819   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2820   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2821   if (nnz) {
2822     for (i=0; i<mbs; i++) {
2823       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2824       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2825     }
2826   }
2827 
2828   b    = (Mat_SeqBAIJ*)B->data;
2829   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2830   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2831   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2832 
2833   if (!flg) {
2834     switch (bs) {
2835     case 1:
2836       B->ops->mult    = MatMult_SeqBAIJ_1;
2837       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2838       break;
2839     case 2:
2840       B->ops->mult    = MatMult_SeqBAIJ_2;
2841       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2842       break;
2843     case 3:
2844       B->ops->mult    = MatMult_SeqBAIJ_3;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2846       break;
2847     case 4:
2848       B->ops->mult    = MatMult_SeqBAIJ_4;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2850       break;
2851     case 5:
2852       B->ops->mult    = MatMult_SeqBAIJ_5;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2854       break;
2855     case 6:
2856       B->ops->mult    = MatMult_SeqBAIJ_6;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2858       break;
2859     case 7:
2860       B->ops->mult    = MatMult_SeqBAIJ_7;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2862       break;
2863     case 9:
2864 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2865       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2867 #else
2868       B->ops->mult    = MatMult_SeqBAIJ_N;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2870 #endif
2871       break;
2872     case 11:
2873       B->ops->mult    = MatMult_SeqBAIJ_11;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2875       break;
2876     case 15:
2877       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2878       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2879       break;
2880     default:
2881       B->ops->mult    = MatMult_SeqBAIJ_N;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2883       break;
2884     }
2885   }
2886   B->ops->sor = MatSOR_SeqBAIJ;
2887   b->mbs = mbs;
2888   b->nbs = nbs;
2889   if (!skipallocation) {
2890     if (!b->imax) {
2891       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2892       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2893 
2894       b->free_imax_ilen = PETSC_TRUE;
2895     }
2896     /* b->ilen will count nonzeros in each block row so far. */
2897     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2898     if (!nnz) {
2899       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2900       else if (nz < 0) nz = 1;
2901       nz = PetscMin(nz,nbs);
2902       for (i=0; i<mbs; i++) b->imax[i] = nz;
2903       nz = nz*mbs;
2904     } else {
2905       PetscInt64 nz64 = 0;
2906       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2907       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2908     }
2909 
2910     /* allocate the matrix space */
2911     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2912     if (B->structure_only) {
2913       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2914       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2915       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2916     } else {
2917       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2918       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2919       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2920     }
2921     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2922 
2923     if (B->structure_only) {
2924       b->singlemalloc = PETSC_FALSE;
2925       b->free_a       = PETSC_FALSE;
2926     } else {
2927       b->singlemalloc = PETSC_TRUE;
2928       b->free_a       = PETSC_TRUE;
2929     }
2930     b->free_ij = PETSC_TRUE;
2931 
2932     b->i[0] = 0;
2933     for (i=1; i<mbs+1; i++) {
2934       b->i[i] = b->i[i-1] + b->imax[i-1];
2935     }
2936 
2937   } else {
2938     b->free_a  = PETSC_FALSE;
2939     b->free_ij = PETSC_FALSE;
2940   }
2941 
2942   b->bs2              = bs2;
2943   b->mbs              = mbs;
2944   b->nz               = 0;
2945   b->maxnz            = nz;
2946   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2947   B->was_assembled    = PETSC_FALSE;
2948   B->assembled        = PETSC_FALSE;
2949   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2950   PetscFunctionReturn(0);
2951 }
2952 
2953 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2954 {
2955   PetscInt       i,m,nz,nz_max=0,*nnz;
2956   PetscScalar    *values=0;
2957   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2958   PetscErrorCode ierr;
2959 
2960   PetscFunctionBegin;
2961   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2962   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2963   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2964   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2965   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2966   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2967   m    = B->rmap->n/bs;
2968 
2969   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2970   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2971   for (i=0; i<m; i++) {
2972     nz = ii[i+1]- ii[i];
2973     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2974     nz_max = PetscMax(nz_max, nz);
2975     nnz[i] = nz;
2976   }
2977   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2978   ierr = PetscFree(nnz);CHKERRQ(ierr);
2979 
2980   values = (PetscScalar*)V;
2981   if (!values) {
2982     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2983   }
2984   for (i=0; i<m; i++) {
2985     PetscInt          ncols  = ii[i+1] - ii[i];
2986     const PetscInt    *icols = jj + ii[i];
2987     if (bs == 1 || !roworiented) {
2988       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2989       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2990     } else {
2991       PetscInt j;
2992       for (j=0; j<ncols; j++) {
2993         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2994         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2995       }
2996     }
2997   }
2998   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2999   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3000   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3001   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3002   PetscFunctionReturn(0);
3003 }
3004 
3005 /*@C
3006    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3007 
3008    Not Collective
3009 
3010    Input Parameter:
3011 .  mat - a MATSEQBAIJ matrix
3012 
3013    Output Parameter:
3014 .   array - pointer to the data
3015 
3016    Level: intermediate
3017 
3018 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3019 @*/
3020 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3021 {
3022   PetscErrorCode ierr;
3023 
3024   PetscFunctionBegin;
3025   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3026   PetscFunctionReturn(0);
3027 }
3028 
3029 /*@C
3030    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3031 
3032    Not Collective
3033 
3034    Input Parameters:
3035 +  mat - a MATSEQBAIJ matrix
3036 -  array - pointer to the data
3037 
3038    Level: intermediate
3039 
3040 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3041 @*/
3042 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3043 {
3044   PetscErrorCode ierr;
3045 
3046   PetscFunctionBegin;
3047   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3048   PetscFunctionReturn(0);
3049 }
3050 
3051 /*MC
3052    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3053    block sparse compressed row format.
3054 
3055    Options Database Keys:
3056 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3057 
3058    Level: beginner
3059 
3060    Notes:
3061     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3062     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3063 
3064 .seealso: MatCreateSeqBAIJ()
3065 M*/
3066 
3067 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3068 
3069 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3070 {
3071   PetscErrorCode ierr;
3072   PetscMPIInt    size;
3073   Mat_SeqBAIJ    *b;
3074 
3075   PetscFunctionBegin;
3076   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3077   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3078 
3079   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3080   B->data = (void*)b;
3081   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3082 
3083   b->row          = 0;
3084   b->col          = 0;
3085   b->icol         = 0;
3086   b->reallocs     = 0;
3087   b->saved_values = 0;
3088 
3089   b->roworiented        = PETSC_TRUE;
3090   b->nonew              = 0;
3091   b->diag               = 0;
3092   B->spptr              = 0;
3093   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3094   b->keepnonzeropattern = PETSC_FALSE;
3095 
3096   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3097   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3098   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3099   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3100   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3101   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3102   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3103   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3104   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3105   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3106   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3107 #if defined(PETSC_HAVE_HYPRE)
3108   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3109 #endif
3110   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3111   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3112   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3113   PetscFunctionReturn(0);
3114 }
3115 
3116 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3117 {
3118   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3119   PetscErrorCode ierr;
3120   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3121 
3122   PetscFunctionBegin;
3123   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3124 
3125   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3126     c->imax           = a->imax;
3127     c->ilen           = a->ilen;
3128     c->free_imax_ilen = PETSC_FALSE;
3129   } else {
3130     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3131     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3132     for (i=0; i<mbs; i++) {
3133       c->imax[i] = a->imax[i];
3134       c->ilen[i] = a->ilen[i];
3135     }
3136     c->free_imax_ilen = PETSC_TRUE;
3137   }
3138 
3139   /* allocate the matrix space */
3140   if (mallocmatspace) {
3141     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3142       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3143       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3144 
3145       c->i            = a->i;
3146       c->j            = a->j;
3147       c->singlemalloc = PETSC_FALSE;
3148       c->free_a       = PETSC_TRUE;
3149       c->free_ij      = PETSC_FALSE;
3150       c->parent       = A;
3151       C->preallocated = PETSC_TRUE;
3152       C->assembled    = PETSC_TRUE;
3153 
3154       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3155       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3156       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3157     } else {
3158       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3159       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3160 
3161       c->singlemalloc = PETSC_TRUE;
3162       c->free_a       = PETSC_TRUE;
3163       c->free_ij      = PETSC_TRUE;
3164 
3165       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3166       if (mbs > 0) {
3167         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3168         if (cpvalues == MAT_COPY_VALUES) {
3169           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3170         } else {
3171           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3172         }
3173       }
3174       C->preallocated = PETSC_TRUE;
3175       C->assembled    = PETSC_TRUE;
3176     }
3177   }
3178 
3179   c->roworiented = a->roworiented;
3180   c->nonew       = a->nonew;
3181 
3182   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3183   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3184 
3185   c->bs2         = a->bs2;
3186   c->mbs         = a->mbs;
3187   c->nbs         = a->nbs;
3188 
3189   if (a->diag) {
3190     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3191       c->diag      = a->diag;
3192       c->free_diag = PETSC_FALSE;
3193     } else {
3194       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3195       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3196       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3197       c->free_diag = PETSC_TRUE;
3198     }
3199   } else c->diag = 0;
3200 
3201   c->nz         = a->nz;
3202   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3203   c->solve_work = NULL;
3204   c->mult_work  = NULL;
3205   c->sor_workt  = NULL;
3206   c->sor_work   = NULL;
3207 
3208   c->compressedrow.use   = a->compressedrow.use;
3209   c->compressedrow.nrows = a->compressedrow.nrows;
3210   if (a->compressedrow.use) {
3211     i    = a->compressedrow.nrows;
3212     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3213     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3214     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3215     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3216   } else {
3217     c->compressedrow.use    = PETSC_FALSE;
3218     c->compressedrow.i      = NULL;
3219     c->compressedrow.rindex = NULL;
3220   }
3221   C->nonzerostate = A->nonzerostate;
3222 
3223   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3224   PetscFunctionReturn(0);
3225 }
3226 
3227 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3228 {
3229   PetscErrorCode ierr;
3230 
3231   PetscFunctionBegin;
3232   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3233   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3234   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3235   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3236   PetscFunctionReturn(0);
3237 }
3238 
3239 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3240 {
3241   Mat_SeqBAIJ    *a;
3242   PetscErrorCode ierr;
3243   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3244   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3245   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3246   PetscInt       *masked,nmask,tmp,bs2,ishift;
3247   PetscMPIInt    size;
3248   int            fd;
3249   PetscScalar    *aa;
3250   MPI_Comm       comm;
3251   PetscBool      isbinary;
3252 
3253   PetscFunctionBegin;
3254   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3255   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3256 
3257   /* force binary viewer to load .info file if it has not yet done so */
3258   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3259   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3260   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3261   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3262   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3263   if (bs < 0) bs = 1;
3264   bs2  = bs*bs;
3265 
3266   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3267   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3268   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3269   ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3270   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3271   M = header[1]; N = header[2]; nz = header[3];
3272 
3273   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3274   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3275 
3276   /*
3277      This code adds extra rows to make sure the number of rows is
3278     divisible by the blocksize
3279   */
3280   mbs        = M/bs;
3281   extra_rows = bs - M + bs*(mbs);
3282   if (extra_rows == bs) extra_rows = 0;
3283   else mbs++;
3284   if (extra_rows) {
3285     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3286   }
3287 
3288   /* Set global sizes if not already set */
3289   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3290     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3291   } else { /* Check if the matrix global sizes are correct */
3292     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3293     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3294       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3295     }
3296     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3297   }
3298 
3299   /* read in row lengths */
3300   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3301   ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
3302   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3303 
3304   /* read in column indices */
3305   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3306   ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3307   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3308 
3309   /* loop over row lengths determining block row lengths */
3310   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3311   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3312   ierr     = PetscArrayzero(mask,mbs);CHKERRQ(ierr);
3313   rowcount = 0;
3314   nzcount  = 0;
3315   for (i=0; i<mbs; i++) {
3316     nmask = 0;
3317     for (j=0; j<bs; j++) {
3318       kmax = rowlengths[rowcount];
3319       for (k=0; k<kmax; k++) {
3320         tmp = jj[nzcount++]/bs;
3321         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3322       }
3323       rowcount++;
3324     }
3325     browlengths[i] += nmask;
3326     /* zero out the mask elements we set */
3327     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3328   }
3329 
3330   /* Do preallocation  */
3331   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3332   a    = (Mat_SeqBAIJ*)newmat->data;
3333 
3334   /* set matrix "i" values */
3335   a->i[0] = 0;
3336   for (i=1; i<= mbs; i++) {
3337     a->i[i]      = a->i[i-1] + browlengths[i-1];
3338     a->ilen[i-1] = browlengths[i-1];
3339   }
3340   a->nz = 0;
3341   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3342 
3343   /* read in nonzero values */
3344   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3345   ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3346   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3347 
3348   /* set "a" and "j" values into matrix */
3349   nzcount = 0; jcount = 0;
3350   for (i=0; i<mbs; i++) {
3351     nzcountb = nzcount;
3352     nmask    = 0;
3353     for (j=0; j<bs; j++) {
3354       kmax = rowlengths[i*bs+j];
3355       for (k=0; k<kmax; k++) {
3356         tmp = jj[nzcount++]/bs;
3357         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3358       }
3359     }
3360     /* sort the masked values */
3361     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3362 
3363     /* set "j" values into matrix */
3364     maskcount = 1;
3365     for (j=0; j<nmask; j++) {
3366       a->j[jcount++]  = masked[j];
3367       mask[masked[j]] = maskcount++;
3368     }
3369     /* set "a" values into matrix */
3370     ishift = bs2*a->i[i];
3371     for (j=0; j<bs; j++) {
3372       kmax = rowlengths[i*bs+j];
3373       for (k=0; k<kmax; k++) {
3374         tmp       = jj[nzcountb]/bs;
3375         block     = mask[tmp] - 1;
3376         point     = jj[nzcountb] - bs*tmp;
3377         idx       = ishift + bs2*block + j + bs*point;
3378         a->a[idx] = (MatScalar)aa[nzcountb++];
3379       }
3380     }
3381     /* zero out the mask elements we set */
3382     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3383   }
3384   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3385 
3386   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3387   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3388   ierr = PetscFree(aa);CHKERRQ(ierr);
3389   ierr = PetscFree(jj);CHKERRQ(ierr);
3390   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3391 
3392   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3393   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3394   PetscFunctionReturn(0);
3395 }
3396 
3397 /*@C
3398    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3399    compressed row) format.  For good matrix assembly performance the
3400    user should preallocate the matrix storage by setting the parameter nz
3401    (or the array nnz).  By setting these parameters accurately, performance
3402    during matrix assembly can be increased by more than a factor of 50.
3403 
3404    Collective
3405 
3406    Input Parameters:
3407 +  comm - MPI communicator, set to PETSC_COMM_SELF
3408 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3409           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3410 .  m - number of rows
3411 .  n - number of columns
3412 .  nz - number of nonzero blocks  per block row (same for all rows)
3413 -  nnz - array containing the number of nonzero blocks in the various block rows
3414          (possibly different for each block row) or NULL
3415 
3416    Output Parameter:
3417 .  A - the matrix
3418 
3419    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3420    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3421    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3422 
3423    Options Database Keys:
3424 +   -mat_no_unroll - uses code that does not unroll the loops in the
3425                      block calculations (much slower)
3426 -    -mat_block_size - size of the blocks to use
3427 
3428    Level: intermediate
3429 
3430    Notes:
3431    The number of rows and columns must be divisible by blocksize.
3432 
3433    If the nnz parameter is given then the nz parameter is ignored
3434 
3435    A nonzero block is any block that as 1 or more nonzeros in it
3436 
3437    The block AIJ format is fully compatible with standard Fortran 77
3438    storage.  That is, the stored row and column indices can begin at
3439    either one (as in Fortran) or zero.  See the users' manual for details.
3440 
3441    Specify the preallocated storage with either nz or nnz (not both).
3442    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3443    allocation.  See Users-Manual: ch_mat for details.
3444    matrices.
3445 
3446 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3447 @*/
3448 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3449 {
3450   PetscErrorCode ierr;
3451 
3452   PetscFunctionBegin;
3453   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3454   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3455   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3456   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3457   PetscFunctionReturn(0);
3458 }
3459 
3460 /*@C
3461    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3462    per row in the matrix. For good matrix assembly performance the
3463    user should preallocate the matrix storage by setting the parameter nz
3464    (or the array nnz).  By setting these parameters accurately, performance
3465    during matrix assembly can be increased by more than a factor of 50.
3466 
3467    Collective
3468 
3469    Input Parameters:
3470 +  B - the matrix
3471 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3472           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3473 .  nz - number of block nonzeros per block row (same for all rows)
3474 -  nnz - array containing the number of block nonzeros in the various block rows
3475          (possibly different for each block row) or NULL
3476 
3477    Options Database Keys:
3478 +   -mat_no_unroll - uses code that does not unroll the loops in the
3479                      block calculations (much slower)
3480 -   -mat_block_size - size of the blocks to use
3481 
3482    Level: intermediate
3483 
3484    Notes:
3485    If the nnz parameter is given then the nz parameter is ignored
3486 
3487    You can call MatGetInfo() to get information on how effective the preallocation was;
3488    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3489    You can also run with the option -info and look for messages with the string
3490    malloc in them to see if additional memory allocation was needed.
3491 
3492    The block AIJ format is fully compatible with standard Fortran 77
3493    storage.  That is, the stored row and column indices can begin at
3494    either one (as in Fortran) or zero.  See the users' manual for details.
3495 
3496    Specify the preallocated storage with either nz or nnz (not both).
3497    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3498    allocation.  See Users-Manual: ch_mat for details.
3499 
3500 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3501 @*/
3502 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3503 {
3504   PetscErrorCode ierr;
3505 
3506   PetscFunctionBegin;
3507   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3508   PetscValidType(B,1);
3509   PetscValidLogicalCollectiveInt(B,bs,2);
3510   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3511   PetscFunctionReturn(0);
3512 }
3513 
3514 /*@C
3515    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3516 
3517    Collective
3518 
3519    Input Parameters:
3520 +  B - the matrix
3521 .  i - the indices into j for the start of each local row (starts with zero)
3522 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3523 -  v - optional values in the matrix
3524 
3525    Level: advanced
3526 
3527    Notes:
3528    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3529    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3530    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3531    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3532    block column and the second index is over columns within a block.
3533 
3534    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3535 
3536 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3537 @*/
3538 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3539 {
3540   PetscErrorCode ierr;
3541 
3542   PetscFunctionBegin;
3543   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3544   PetscValidType(B,1);
3545   PetscValidLogicalCollectiveInt(B,bs,2);
3546   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3547   PetscFunctionReturn(0);
3548 }
3549 
3550 
3551 /*@
3552      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3553 
3554      Collective
3555 
3556    Input Parameters:
3557 +  comm - must be an MPI communicator of size 1
3558 .  bs - size of block
3559 .  m - number of rows
3560 .  n - number of columns
3561 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3562 .  j - column indices
3563 -  a - matrix values
3564 
3565    Output Parameter:
3566 .  mat - the matrix
3567 
3568    Level: advanced
3569 
3570    Notes:
3571        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3572     once the matrix is destroyed
3573 
3574        You cannot set new nonzero locations into this matrix, that will generate an error.
3575 
3576        The i and j indices are 0 based
3577 
3578        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3579 
3580       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3581       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3582       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3583       with column-major ordering within blocks.
3584 
3585 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3586 
3587 @*/
3588 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3589 {
3590   PetscErrorCode ierr;
3591   PetscInt       ii;
3592   Mat_SeqBAIJ    *baij;
3593 
3594   PetscFunctionBegin;
3595   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3596   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3597 
3598   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3599   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3600   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3601   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3602   baij = (Mat_SeqBAIJ*)(*mat)->data;
3603   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3604   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3605 
3606   baij->i = i;
3607   baij->j = j;
3608   baij->a = a;
3609 
3610   baij->singlemalloc = PETSC_FALSE;
3611   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3612   baij->free_a       = PETSC_FALSE;
3613   baij->free_ij      = PETSC_FALSE;
3614 
3615   for (ii=0; ii<m; ii++) {
3616     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3617 #if defined(PETSC_USE_DEBUG)
3618     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3619 #endif
3620   }
3621 #if defined(PETSC_USE_DEBUG)
3622   for (ii=0; ii<baij->i[m]; ii++) {
3623     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3624     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3625   }
3626 #endif
3627 
3628   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3629   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3630   PetscFunctionReturn(0);
3631 }
3632 
3633 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3634 {
3635   PetscErrorCode ierr;
3636   PetscMPIInt    size;
3637 
3638   PetscFunctionBegin;
3639   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3640   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3641     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3642   } else {
3643     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3644   }
3645   PetscFunctionReturn(0);
3646 }
3647