xref: /petsc/src/mat/impls/baij/seq/baij.c (revision f5ff9c666c0d37e8a7ec3aa2f8e2aa9e44449bdb)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21 {
22   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
23   PetscErrorCode ierr;
24   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
25   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
26   PetscReal      shift = 0.0;
27   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
28 
29   PetscFunctionBegin;
30   allowzeropivot = PetscNot(A->erroriffailure);
31 
32   if (a->idiagvalid) {
33     if (values) *values = a->idiag;
34     PetscFunctionReturn(0);
35   }
36   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
37   diag_offset = a->diag;
38   if (!a->idiag) {
39     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
40     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
41   }
42   diag  = a->idiag;
43   if (values) *values = a->idiag;
44   /* factor and invert each block */
45   switch (bs) {
46   case 1:
47     for (i=0; i<mbs; i++) {
48       odiag    = v + 1*diag_offset[i];
49       diag[0]  = odiag[0];
50 
51       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52         if (allowzeropivot) {
53           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
54           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
55           A->factorerror_zeropivot_row   = i;
56           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
57         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58       }
59 
60       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
61       diag    += 1;
62     }
63     break;
64   case 2:
65     for (i=0; i<mbs; i++) {
66       odiag    = v + 4*diag_offset[i];
67       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
69       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70       diag    += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
80       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81       diag    += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
88       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
89       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90       diag  += 16;
91     }
92     break;
93   case 5:
94     for (i=0; i<mbs; i++) {
95       odiag  = v + 25*diag_offset[i];
96       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99       diag  += 25;
100     }
101     break;
102   case 6:
103     for (i=0; i<mbs; i++) {
104       odiag  = v + 36*diag_offset[i];
105       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
107       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108       diag  += 36;
109     }
110     break;
111   case 7:
112     for (i=0; i<mbs; i++) {
113       odiag  = v + 49*diag_offset[i];
114       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 49;
118     }
119     break;
120   default:
121     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122     for (i=0; i<mbs; i++) {
123       odiag  = v + bs2*diag_offset[i];
124       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
125       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127       diag  += bs2;
128     }
129     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136 {
137   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
138   PetscScalar       *x,*work,*w,*workt,*t;
139   const MatScalar   *v,*aa = a->a, *idiag;
140   const PetscScalar *b,*xb;
141   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142   PetscErrorCode    ierr;
143   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
145 
146   PetscFunctionBegin;
147   its = its*lits;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
151   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
152   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
153 
154   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155 
156   if (!m) PetscFunctionReturn(0);
157   diag  = a->diag;
158   idiag = a->idiag;
159   k    = PetscMax(A->rmap->n,A->cmap->n);
160   if (!a->mult_work) {
161     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162   }
163   if (!a->sor_workt) {
164     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165   }
166   if (!a->sor_work) {
167     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168   }
169   work = a->mult_work;
170   t    = a->sor_workt;
171   w    = a->sor_work;
172 
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178       switch (bs) {
179       case 1:
180         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181         t[0] = b[0];
182         i2     = 1;
183         idiag += 1;
184         for (i=1; i<m; i++) {
185           v  = aa + ai[i];
186           vi = aj + ai[i];
187           nz = diag[i] - ai[i];
188           s[0] = b[i2];
189           for (j=0; j<nz; j++) {
190             xw[0] = x[vi[j]];
191             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192           }
193           t[i2] = s[0];
194           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195           x[i2]  = xw[0];
196           idiag += 1;
197           i2    += 1;
198         }
199         break;
200       case 2:
201         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202         t[0] = b[0]; t[1] = b[1];
203         i2     = 2;
204         idiag += 4;
205         for (i=1; i<m; i++) {
206           v  = aa + 4*ai[i];
207           vi = aj + ai[i];
208           nz = diag[i] - ai[i];
209           s[0] = b[i2]; s[1] = b[i2+1];
210           for (j=0; j<nz; j++) {
211             idx = 2*vi[j];
212             it  = 4*j;
213             xw[0] = x[idx]; xw[1] = x[1+idx];
214             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215           }
216           t[i2] = s[0]; t[i2+1] = s[1];
217           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218           x[i2]   = xw[0]; x[i2+1] = xw[1];
219           idiag  += 4;
220           i2     += 2;
221         }
222         break;
223       case 3:
224         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226         i2     = 3;
227         idiag += 9;
228         for (i=1; i<m; i++) {
229           v  = aa + 9*ai[i];
230           vi = aj + ai[i];
231           nz = diag[i] - ai[i];
232           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233           while (nz--) {
234             idx = 3*(*vi++);
235             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237             v  += 9;
238           }
239           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242           idiag  += 9;
243           i2     += 3;
244         }
245         break;
246       case 4:
247         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249         i2     = 4;
250         idiag += 16;
251         for (i=1; i<m; i++) {
252           v  = aa + 16*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256           while (nz--) {
257             idx = 4*(*vi++);
258             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260             v  += 16;
261           }
262           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265           idiag  += 16;
266           i2     += 4;
267         }
268         break;
269       case 5:
270         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272         i2     = 5;
273         idiag += 25;
274         for (i=1; i<m; i++) {
275           v  = aa + 25*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279           while (nz--) {
280             idx = 5*(*vi++);
281             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283             v  += 25;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288           idiag  += 25;
289           i2     += 5;
290         }
291         break;
292       case 6:
293         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295         i2     = 6;
296         idiag += 36;
297         for (i=1; i<m; i++) {
298           v  = aa + 36*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302           while (nz--) {
303             idx = 6*(*vi++);
304             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307             v  += 36;
308           }
309           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313           idiag  += 36;
314           i2     += 6;
315         }
316         break;
317       case 7:
318         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321         i2     = 7;
322         idiag += 49;
323         for (i=1; i<m; i++) {
324           v  = aa + 49*ai[i];
325           vi = aj + ai[i];
326           nz = diag[i] - ai[i];
327           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329           while (nz--) {
330             idx = 7*(*vi++);
331             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334             v  += 49;
335           }
336           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341           idiag  += 49;
342           i2     += 7;
343         }
344         break;
345       default:
346         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
348         i2     = bs;
349         idiag += bs2;
350         for (i=1; i<m; i++) {
351           v  = aa + bs2*ai[i];
352           vi = aj + ai[i];
353           nz = diag[i] - ai[i];
354 
355           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
356           /* copy all rows of x that are needed into contiguous space */
357           workt = work;
358           for (j=0; j<nz; j++) {
359             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
360             workt += bs;
361           }
362           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
364           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365 
366           idiag += bs2;
367           i2    += bs;
368         }
369         break;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373       xb = t;
374     }
375     else xb = b;
376     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377       idiag = a->idiag+bs2*(a->mbs-1);
378       i2 = bs * (m-1);
379       switch (bs) {
380       case 1:
381         s[0]  = xb[i2];
382         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383         x[i2] = xw[0];
384         i2   -= 1;
385         for (i=m-2; i>=0; i--) {
386           v  = aa + (diag[i]+1);
387           vi = aj + diag[i] + 1;
388           nz = ai[i+1] - diag[i] - 1;
389           s[0] = xb[i2];
390           for (j=0; j<nz; j++) {
391             xw[0] = x[vi[j]];
392             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393           }
394           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395           x[i2]  = xw[0];
396           idiag -= 1;
397           i2    -= 1;
398         }
399         break;
400       case 2:
401         s[0]  = xb[i2]; s[1] = xb[i2+1];
402         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403         x[i2] = xw[0]; x[i2+1] = xw[1];
404         i2    -= 2;
405         idiag -= 4;
406         for (i=m-2; i>=0; i--) {
407           v  = aa + 4*(diag[i] + 1);
408           vi = aj + diag[i] + 1;
409           nz = ai[i+1] - diag[i] - 1;
410           s[0] = xb[i2]; s[1] = xb[i2+1];
411           for (j=0; j<nz; j++) {
412             idx = 2*vi[j];
413             it  = 4*j;
414             xw[0] = x[idx]; xw[1] = x[1+idx];
415             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416           }
417           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418           x[i2]   = xw[0]; x[i2+1] = xw[1];
419           idiag  -= 4;
420           i2     -= 2;
421         }
422         break;
423       case 3:
424         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427         i2    -= 3;
428         idiag -= 9;
429         for (i=m-2; i>=0; i--) {
430           v  = aa + 9*(diag[i]+1);
431           vi = aj + diag[i] + 1;
432           nz = ai[i+1] - diag[i] - 1;
433           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434           while (nz--) {
435             idx = 3*(*vi++);
436             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438             v  += 9;
439           }
440           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442           idiag  -= 9;
443           i2     -= 3;
444         }
445         break;
446       case 4:
447         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450         i2    -= 4;
451         idiag -= 16;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 16*(diag[i]+1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457           while (nz--) {
458             idx = 4*(*vi++);
459             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461             v  += 16;
462           }
463           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465           idiag  -= 16;
466           i2     -= 4;
467         }
468         break;
469       case 5:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473         i2    -= 5;
474         idiag -= 25;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 25*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480           while (nz--) {
481             idx = 5*(*vi++);
482             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484             v  += 25;
485           }
486           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488           idiag  -= 25;
489           i2     -= 5;
490         }
491         break;
492       case 6:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496         i2    -= 6;
497         idiag -= 36;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 36*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503           while (nz--) {
504             idx = 6*(*vi++);
505             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508             v  += 36;
509           }
510           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512           idiag  -= 36;
513           i2     -= 6;
514         }
515         break;
516       case 7:
517         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522         i2    -= 7;
523         idiag -= 49;
524         for (i=m-2; i>=0; i--) {
525           v  = aa + 49*(diag[i]+1);
526           vi = aj + diag[i] + 1;
527           nz = ai[i+1] - diag[i] - 1;
528           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530           while (nz--) {
531             idx = 7*(*vi++);
532             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535             v  += 49;
536           }
537           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540           idiag  -= 49;
541           i2     -= 7;
542         }
543         break;
544       default:
545         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
546         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547         i2    -= bs;
548         idiag -= bs2;
549         for (i=m-2; i>=0; i--) {
550           v  = aa + bs2*(diag[i]+1);
551           vi = aj + diag[i] + 1;
552           nz = ai[i+1] - diag[i] - 1;
553 
554           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
555           /* copy all rows of x that are needed into contiguous space */
556           workt = work;
557           for (j=0; j<nz; j++) {
558             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
559             workt += bs;
560           }
561           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563 
564           idiag -= bs2;
565           i2    -= bs;
566         }
567         break;
568       }
569       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570     }
571     its--;
572   }
573   while (its--) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       idiag = a->idiag;
576       i2 = 0;
577       switch (bs) {
578       case 1:
579         for (i=0; i<m; i++) {
580           v  = aa + ai[i];
581           vi = aj + ai[i];
582           nz = ai[i+1] - ai[i];
583           s[0] = b[i2];
584           for (j=0; j<nz; j++) {
585             xw[0] = x[vi[j]];
586             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587           }
588           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589           x[i2] += xw[0];
590           idiag += 1;
591           i2    += 1;
592         }
593         break;
594       case 2:
595         for (i=0; i<m; i++) {
596           v  = aa + 4*ai[i];
597           vi = aj + ai[i];
598           nz = ai[i+1] - ai[i];
599           s[0] = b[i2]; s[1] = b[i2+1];
600           for (j=0; j<nz; j++) {
601             idx = 2*vi[j];
602             it  = 4*j;
603             xw[0] = x[idx]; xw[1] = x[1+idx];
604             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605           }
606           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607           x[i2]  += xw[0]; x[i2+1] += xw[1];
608           idiag  += 4;
609           i2     += 2;
610         }
611         break;
612       case 3:
613         for (i=0; i<m; i++) {
614           v  = aa + 9*ai[i];
615           vi = aj + ai[i];
616           nz = ai[i+1] - ai[i];
617           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618           while (nz--) {
619             idx = 3*(*vi++);
620             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622             v  += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626           idiag  += 9;
627           i2     += 3;
628         }
629         break;
630       case 4:
631         for (i=0; i<m; i++) {
632           v  = aa + 16*ai[i];
633           vi = aj + ai[i];
634           nz = ai[i+1] - ai[i];
635           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636           while (nz--) {
637             idx = 4*(*vi++);
638             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640             v  += 16;
641           }
642           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644           idiag  += 16;
645           i2     += 4;
646         }
647         break;
648       case 5:
649         for (i=0; i<m; i++) {
650           v  = aa + 25*ai[i];
651           vi = aj + ai[i];
652           nz = ai[i+1] - ai[i];
653           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654           while (nz--) {
655             idx = 5*(*vi++);
656             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658             v  += 25;
659           }
660           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662           idiag  += 25;
663           i2     += 5;
664         }
665         break;
666       case 6:
667         for (i=0; i<m; i++) {
668           v  = aa + 36*ai[i];
669           vi = aj + ai[i];
670           nz = ai[i+1] - ai[i];
671           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672           while (nz--) {
673             idx = 6*(*vi++);
674             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677             v  += 36;
678           }
679           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682           idiag  += 36;
683           i2     += 6;
684         }
685         break;
686       case 7:
687         for (i=0; i<m; i++) {
688           v  = aa + 49*ai[i];
689           vi = aj + ai[i];
690           nz = ai[i+1] - ai[i];
691           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693           while (nz--) {
694             idx = 7*(*vi++);
695             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698             v  += 49;
699           }
700           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703           idiag  += 49;
704           i2     += 7;
705         }
706         break;
707       default:
708         for (i=0; i<m; i++) {
709           v  = aa + bs2*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712 
713           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
714           /* copy all rows of x that are needed into contiguous space */
715           workt = work;
716           for (j=0; j<nz; j++) {
717             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
718             workt += bs;
719           }
720           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722 
723           idiag += bs2;
724           i2    += bs;
725         }
726         break;
727       }
728       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729     }
730     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731       idiag = a->idiag+bs2*(a->mbs-1);
732       i2 = bs * (m-1);
733       switch (bs) {
734       case 1:
735         for (i=m-1; i>=0; i--) {
736           v  = aa + ai[i];
737           vi = aj + ai[i];
738           nz = ai[i+1] - ai[i];
739           s[0] = b[i2];
740           for (j=0; j<nz; j++) {
741             xw[0] = x[vi[j]];
742             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743           }
744           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745           x[i2] += xw[0];
746           idiag -= 1;
747           i2    -= 1;
748         }
749         break;
750       case 2:
751         for (i=m-1; i>=0; i--) {
752           v  = aa + 4*ai[i];
753           vi = aj + ai[i];
754           nz = ai[i+1] - ai[i];
755           s[0] = b[i2]; s[1] = b[i2+1];
756           for (j=0; j<nz; j++) {
757             idx = 2*vi[j];
758             it  = 4*j;
759             xw[0] = x[idx]; xw[1] = x[1+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761           }
762           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763           x[i2]  += xw[0]; x[i2+1] += xw[1];
764           idiag  -= 4;
765           i2     -= 2;
766         }
767         break;
768       case 3:
769         for (i=m-1; i>=0; i--) {
770           v  = aa + 9*ai[i];
771           vi = aj + ai[i];
772           nz = ai[i+1] - ai[i];
773           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774           while (nz--) {
775             idx = 3*(*vi++);
776             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778             v  += 9;
779           }
780           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782           idiag  -= 9;
783           i2     -= 3;
784         }
785         break;
786       case 4:
787         for (i=m-1; i>=0; i--) {
788           v  = aa + 16*ai[i];
789           vi = aj + ai[i];
790           nz = ai[i+1] - ai[i];
791           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792           while (nz--) {
793             idx = 4*(*vi++);
794             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796             v  += 16;
797           }
798           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800           idiag  -= 16;
801           i2     -= 4;
802         }
803         break;
804       case 5:
805         for (i=m-1; i>=0; i--) {
806           v  = aa + 25*ai[i];
807           vi = aj + ai[i];
808           nz = ai[i+1] - ai[i];
809           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810           while (nz--) {
811             idx = 5*(*vi++);
812             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814             v  += 25;
815           }
816           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818           idiag  -= 25;
819           i2     -= 5;
820         }
821         break;
822       case 6:
823         for (i=m-1; i>=0; i--) {
824           v  = aa + 36*ai[i];
825           vi = aj + ai[i];
826           nz = ai[i+1] - ai[i];
827           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828           while (nz--) {
829             idx = 6*(*vi++);
830             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833             v  += 36;
834           }
835           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838           idiag  -= 36;
839           i2     -= 6;
840         }
841         break;
842       case 7:
843         for (i=m-1; i>=0; i--) {
844           v  = aa + 49*ai[i];
845           vi = aj + ai[i];
846           nz = ai[i+1] - ai[i];
847           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849           while (nz--) {
850             idx = 7*(*vi++);
851             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854             v  += 49;
855           }
856           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859           idiag  -= 49;
860           i2     -= 7;
861         }
862         break;
863       default:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + bs2*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868 
869           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
874             workt += bs;
875           }
876           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878 
879           idiag -= bs2;
880           i2    -= bs;
881         }
882         break;
883       }
884       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885     }
886   }
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 
893 /*
894     Special version for direct calls from Fortran (Used in PETSc-fun3d)
895 */
896 #if defined(PETSC_HAVE_FORTRAN_CAPS)
897 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
898 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
899 #define matsetvaluesblocked4_ matsetvaluesblocked4
900 #endif
901 
902 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
903 {
904   Mat               A  = *AA;
905   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
906   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
907   PetscInt          *ai    =a->i,*ailen=a->ilen;
908   PetscInt          *aj    =a->j,stepval,lastcol = -1;
909   const PetscScalar *value = v;
910   MatScalar         *ap,*aa = a->a,*bap;
911   PetscErrorCode    ierr;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
952       }
953       if (N >= i) {
954         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986   PetscErrorCode ierr;
987 
988   PetscFunctionBegin;
989   for (k=0; k<m; k++) { /* loop over added rows */
990     row  = im[k]; brow = row/4;
991     rp   = aj + ai[brow];
992     ap   = aa + 16*ai[brow];
993     nrow = ailen[brow];
994     low  = 0;
995     high = nrow;
996     for (l=0; l<n; l++) { /* loop over added columns */
997       col   = in[l]; bcol = col/4;
998       ridx  = row % 4; cidx = col % 4;
999       value = v[l + k*n];
1000       if (col <= lastcol)  low = 0;
1001       else                high = nrow;
1002       lastcol = col;
1003       while (high-low > 7) {
1004         t = (low+high)/2;
1005         if (rp[t] > bcol) high = t;
1006         else              low  = t;
1007       }
1008       for (i=low; i<high; i++) {
1009         if (rp[i] > bcol) break;
1010         if (rp[i] == bcol) {
1011           bap   = ap +  16*i + 4*cidx + ridx;
1012           *bap += value;
1013           goto noinsert1;
1014         }
1015       }
1016       N = nrow++ - 1;
1017       high++; /* added new column thus must search to one higher than before */
1018       /* shift up all the later entries in this row */
1019       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1020       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1021       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1022       rp[i]                    = bcol;
1023       ap[16*i + 4*cidx + ridx] = value;
1024 noinsert1:;
1025       low = i;
1026     }
1027     ailen[brow] = nrow;
1028   }
1029   PetscFunctionReturnVoid();
1030 }
1031 
1032 /*
1033      Checks for missing diagonals
1034 */
1035 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1036 {
1037   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039   PetscInt       *diag,*ii = a->i,i;
1040 
1041   PetscFunctionBegin;
1042   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1043   *missing = PETSC_FALSE;
1044   if (A->rmap->n > 0 && !ii) {
1045     *missing = PETSC_TRUE;
1046     if (d) *d = 0;
1047     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1048   } else {
1049     PetscInt n;
1050     n = PetscMin(a->mbs, a->nbs);
1051     diag = a->diag;
1052     for (i=0; i<n; i++) {
1053       if (diag[i] >= ii[i+1]) {
1054         *missing = PETSC_TRUE;
1055         if (d) *d = i;
1056         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1057         break;
1058       }
1059     }
1060   }
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1065 {
1066   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1067   PetscErrorCode ierr;
1068   PetscInt       i,j,m = a->mbs;
1069 
1070   PetscFunctionBegin;
1071   if (!a->diag) {
1072     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1073     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1074     a->free_diag = PETSC_TRUE;
1075   }
1076   for (i=0; i<m; i++) {
1077     a->diag[i] = a->i[i+1];
1078     for (j=a->i[i]; j<a->i[i+1]; j++) {
1079       if (a->j[j] == i) {
1080         a->diag[i] = j;
1081         break;
1082       }
1083     }
1084   }
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 
1089 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1090 {
1091   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1092   PetscErrorCode ierr;
1093   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1094   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1095 
1096   PetscFunctionBegin;
1097   *nn = n;
1098   if (!ia) PetscFunctionReturn(0);
1099   if (symmetric) {
1100     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1101     nz   = tia[n];
1102   } else {
1103     tia = a->i; tja = a->j;
1104   }
1105 
1106   if (!blockcompressed && bs > 1) {
1107     (*nn) *= bs;
1108     /* malloc & create the natural set of indices */
1109     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1110     if (n) {
1111       (*ia)[0] = oshift;
1112       for (j=1; j<bs; j++) {
1113         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1114       }
1115     }
1116 
1117     for (i=1; i<n; i++) {
1118       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1119       for (j=1; j<bs; j++) {
1120         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1121       }
1122     }
1123     if (n) {
1124       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1125     }
1126 
1127     if (inja) {
1128       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1129       cnt = 0;
1130       for (i=0; i<n; i++) {
1131         for (j=0; j<bs; j++) {
1132           for (k=tia[i]; k<tia[i+1]; k++) {
1133             for (l=0; l<bs; l++) {
1134               (*ja)[cnt++] = bs*tja[k] + l;
1135             }
1136           }
1137         }
1138       }
1139     }
1140 
1141     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1142       ierr = PetscFree(tia);CHKERRQ(ierr);
1143       ierr = PetscFree(tja);CHKERRQ(ierr);
1144     }
1145   } else if (oshift == 1) {
1146     if (symmetric) {
1147       nz = tia[A->rmap->n/bs];
1148       /*  add 1 to i and j indices */
1149       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1150       *ia = tia;
1151       if (ja) {
1152         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1153         *ja = tja;
1154       }
1155     } else {
1156       nz = a->i[A->rmap->n/bs];
1157       /* malloc space and  add 1 to i and j indices */
1158       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1159       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1160       if (ja) {
1161         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1162         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1163       }
1164     }
1165   } else {
1166     *ia = tia;
1167     if (ja) *ja = tja;
1168   }
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1173 {
1174   PetscErrorCode ierr;
1175 
1176   PetscFunctionBegin;
1177   if (!ia) PetscFunctionReturn(0);
1178   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1179     ierr = PetscFree(*ia);CHKERRQ(ierr);
1180     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1186 {
1187   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1188   PetscErrorCode ierr;
1189 
1190   PetscFunctionBegin;
1191 #if defined(PETSC_USE_LOG)
1192   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1193 #endif
1194   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1197   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1198   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1199   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1200   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1204   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1205   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1206   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1207 
1208   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1209   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1210   ierr = PetscFree(A->data);CHKERRQ(ierr);
1211 
1212   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1225 #if defined(PETSC_HAVE_HYPRE)
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1227 #endif
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = flg;
1241     break;
1242   case MAT_KEEP_NONZERO_PATTERN:
1243     a->keepnonzeropattern = flg;
1244     break;
1245   case MAT_NEW_NONZERO_LOCATIONS:
1246     a->nonew = (flg ? 0 : 1);
1247     break;
1248   case MAT_NEW_NONZERO_LOCATION_ERR:
1249     a->nonew = (flg ? -1 : 0);
1250     break;
1251   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252     a->nonew = (flg ? -2 : 0);
1253     break;
1254   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255     a->nounused = (flg ? -1 : 0);
1256     break;
1257   case MAT_FORCE_DIAGONAL_ENTRIES:
1258   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259   case MAT_USE_HASH_TABLE:
1260   case MAT_SORTED_FULL:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268   case MAT_SUBMAT_SINGLEIS:
1269   case MAT_STRUCTURE_ONLY:
1270     /* These options are handled directly by MatSetOption() */
1271     break;
1272   default:
1273     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280 {
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283   MatScalar      *aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   bs2 = bs*bs;
1289   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1290 
1291   bn  = row/bs;   /* Block number */
1292   bp  = row % bs; /* Block Position */
1293   M   = ai[bn+1] - ai[bn];
1294   *nz = bs*M;
1295 
1296   if (v) {
1297     *v = NULL;
1298     if (*nz) {
1299       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1300       for (i=0; i<M; i++) { /* for each block in the block row */
1301         v_i  = *v + i*bs;
1302         aa_i = aa + bs2*(ai[bn] + i);
1303         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304       }
1305     }
1306   }
1307 
1308   if (idx) {
1309     *idx = NULL;
1310     if (*nz) {
1311       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1312       for (i=0; i<M; i++) { /* for each block in the block row */
1313         idx_i = *idx + i*bs;
1314         itmp  = bs*aj[ai[bn] + i];
1315         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   if (nz)  *nz = 0;
1338   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1339   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1344 {
1345   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1346   Mat            C;
1347   PetscErrorCode ierr;
1348   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1349   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1350   MatScalar      *ata,*aa=a->a;
1351 
1352   PetscFunctionBegin;
1353   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1354   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1355     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1356 
1357     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1358     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1359     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1360     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1361 
1362     at  = (Mat_SeqBAIJ*)C->data;
1363     ati = at->i;
1364     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1365   } else {
1366     C = *B;
1367     at = (Mat_SeqBAIJ*)C->data;
1368     ati = at->i;
1369   }
1370 
1371   atj = at->j;
1372   ata = at->a;
1373 
1374   /* Copy ati into atfill so we have locations of the next free space in atj */
1375   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1376 
1377   /* Walk through A row-wise and mark nonzero entries of A^T. */
1378   for (i=0; i<mbs; i++) {
1379     anzj = ai[i+1] - ai[i];
1380     for (j=0; j<anzj; j++) {
1381       atj[atfill[*aj]] = i;
1382       for (kr=0; kr<bs; kr++) {
1383         for (k=0; k<bs; k++) {
1384           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1385         }
1386       }
1387       atfill[*aj++] += 1;
1388     }
1389   }
1390   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1392 
1393   /* Clean up temporary space and complete requests. */
1394   ierr = PetscFree(atfill);CHKERRQ(ierr);
1395 
1396   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1397     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1398     *B = C;
1399   } else {
1400     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1406 {
1407   PetscErrorCode ierr;
1408   Mat            Btrans;
1409 
1410   PetscFunctionBegin;
1411   *f   = PETSC_FALSE;
1412   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1413   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1414   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1419 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1420 {
1421   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1422   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1423   PetscInt       *rowlens,*colidxs;
1424   PetscScalar    *matvals;
1425   PetscErrorCode ierr;
1426 
1427   PetscFunctionBegin;
1428   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1429 
1430   M  = mat->rmap->N;
1431   N  = mat->cmap->N;
1432   m  = mat->rmap->n;
1433   bs = mat->rmap->bs;
1434   nz = bs*bs*A->nz;
1435 
1436   /* write matrix header */
1437   header[0] = MAT_FILE_CLASSID;
1438   header[1] = M; header[2] = N; header[3] = nz;
1439   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1440 
1441   /* store row lengths */
1442   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1443   for (cnt=0, i=0; i<A->mbs; i++)
1444     for (j=0; j<bs; j++)
1445       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1446   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1447   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1448 
1449   /* store column indices  */
1450   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1451   for (cnt=0, i=0; i<A->mbs; i++)
1452     for (k=0; k<bs; k++)
1453       for (j=A->i[i]; j<A->i[i+1]; j++)
1454         for (l=0; l<bs; l++)
1455           colidxs[cnt++] = bs*A->j[j] + l;
1456   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1457   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1458   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1459 
1460   /* store nonzero values */
1461   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1462   for (cnt=0, i=0; i<A->mbs; i++)
1463     for (k=0; k<bs; k++)
1464       for (j=A->i[i]; j<A->i[i+1]; j++)
1465         for (l=0; l<bs; l++)
1466           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1467   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1468   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1469   ierr = PetscFree(matvals);CHKERRQ(ierr);
1470 
1471   /* write block size option to the viewer's .info file */
1472   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1473   PetscFunctionReturn(0);
1474 }
1475 
1476 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1477 {
1478   PetscErrorCode ierr;
1479   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1480   PetscInt       i,bs = A->rmap->bs,k;
1481 
1482   PetscFunctionBegin;
1483   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1484   for (i=0; i<a->mbs; i++) {
1485     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1486     for (k=a->i[i]; k<a->i[i+1]; k++) {
1487       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1488     }
1489     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1490   }
1491   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1496 {
1497   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1498   PetscErrorCode    ierr;
1499   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1500   PetscViewerFormat format;
1501 
1502   PetscFunctionBegin;
1503   if (A->structure_only) {
1504     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1505     PetscFunctionReturn(0);
1506   }
1507 
1508   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1509   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1510     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1511   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1512     const char *matname;
1513     Mat        aij;
1514     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1515     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1516     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1517     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1518     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1519   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1520       PetscFunctionReturn(0);
1521   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1522     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1523     for (i=0; i<a->mbs; i++) {
1524       for (j=0; j<bs; j++) {
1525         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1526         for (k=a->i[i]; k<a->i[i+1]; k++) {
1527           for (l=0; l<bs; l++) {
1528 #if defined(PETSC_USE_COMPLEX)
1529             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1530               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1531                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1532             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1533               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1534                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             }
1538 #else
1539             if (a->a[bs2*k + l*bs + j] != 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1541             }
1542 #endif
1543           }
1544         }
1545         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1546       }
1547     }
1548     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1549   } else {
1550     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1551     for (i=0; i<a->mbs; i++) {
1552       for (j=0; j<bs; j++) {
1553         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1554         for (k=a->i[i]; k<a->i[i+1]; k++) {
1555           for (l=0; l<bs; l++) {
1556 #if defined(PETSC_USE_COMPLEX)
1557             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1558               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1559                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1560             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1561               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1562                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             } else {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             }
1566 #else
1567             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1568 #endif
1569           }
1570         }
1571         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1572       }
1573     }
1574     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1575   }
1576   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1577   PetscFunctionReturn(0);
1578 }
1579 
1580 #include <petscdraw.h>
1581 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1582 {
1583   Mat               A = (Mat) Aa;
1584   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1585   PetscErrorCode    ierr;
1586   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1587   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1588   MatScalar         *aa;
1589   PetscViewer       viewer;
1590   PetscViewerFormat format;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1594   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1595   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1596 
1597   /* loop over matrix elements drawing boxes */
1598 
1599   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1600     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1601     /* Blue for negative, Cyan for zero and  Red for positive */
1602     color = PETSC_DRAW_BLUE;
1603     for (i=0,row=0; i<mbs; i++,row+=bs) {
1604       for (j=a->i[i]; j<a->i[i+1]; j++) {
1605         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1606         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1607         aa  = a->a + j*bs2;
1608         for (k=0; k<bs; k++) {
1609           for (l=0; l<bs; l++) {
1610             if (PetscRealPart(*aa++) >=  0.) continue;
1611             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1612           }
1613         }
1614       }
1615     }
1616     color = PETSC_DRAW_CYAN;
1617     for (i=0,row=0; i<mbs; i++,row+=bs) {
1618       for (j=a->i[i]; j<a->i[i+1]; j++) {
1619         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1620         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1621         aa  = a->a + j*bs2;
1622         for (k=0; k<bs; k++) {
1623           for (l=0; l<bs; l++) {
1624             if (PetscRealPart(*aa++) != 0.) continue;
1625             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1626           }
1627         }
1628       }
1629     }
1630     color = PETSC_DRAW_RED;
1631     for (i=0,row=0; i<mbs; i++,row+=bs) {
1632       for (j=a->i[i]; j<a->i[i+1]; j++) {
1633         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1634         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1635         aa  = a->a + j*bs2;
1636         for (k=0; k<bs; k++) {
1637           for (l=0; l<bs; l++) {
1638             if (PetscRealPart(*aa++) <= 0.) continue;
1639             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1640           }
1641         }
1642       }
1643     }
1644     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1645   } else {
1646     /* use contour shading to indicate magnitude of values */
1647     /* first determine max of all nonzero values */
1648     PetscReal minv = 0.0, maxv = 0.0;
1649     PetscDraw popup;
1650 
1651     for (i=0; i<a->nz*a->bs2; i++) {
1652       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1653     }
1654     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1655     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1656     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1657 
1658     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1659     for (i=0,row=0; i<mbs; i++,row+=bs) {
1660       for (j=a->i[i]; j<a->i[i+1]; j++) {
1661         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1662         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1663         aa  = a->a + j*bs2;
1664         for (k=0; k<bs; k++) {
1665           for (l=0; l<bs; l++) {
1666             MatScalar v = *aa++;
1667             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1668             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1669           }
1670         }
1671       }
1672     }
1673     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 
1678 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1679 {
1680   PetscErrorCode ierr;
1681   PetscReal      xl,yl,xr,yr,w,h;
1682   PetscDraw      draw;
1683   PetscBool      isnull;
1684 
1685   PetscFunctionBegin;
1686   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1687   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1688   if (isnull) PetscFunctionReturn(0);
1689 
1690   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1691   xr  += w;          yr += h;        xl = -w;     yl = -h;
1692   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1693   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1694   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1695   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1696   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 
1700 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1701 {
1702   PetscErrorCode ierr;
1703   PetscBool      iascii,isbinary,isdraw;
1704 
1705   PetscFunctionBegin;
1706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1707   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1708   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1709   if (iascii) {
1710     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1711   } else if (isbinary) {
1712     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1713   } else if (isdraw) {
1714     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1715   } else {
1716     Mat B;
1717     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1718     ierr = MatView(B,viewer);CHKERRQ(ierr);
1719     ierr = MatDestroy(&B);CHKERRQ(ierr);
1720   }
1721   PetscFunctionReturn(0);
1722 }
1723 
1724 
1725 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1726 {
1727   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1728   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1729   PetscInt    *ai = a->i,*ailen = a->ilen;
1730   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1731   MatScalar   *ap,*aa = a->a;
1732 
1733   PetscFunctionBegin;
1734   for (k=0; k<m; k++) { /* loop over rows */
1735     row = im[k]; brow = row/bs;
1736     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1737     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1738     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1739     nrow = ailen[brow];
1740     for (l=0; l<n; l++) { /* loop over columns */
1741       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1742       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1743       col  = in[l];
1744       bcol = col/bs;
1745       cidx = col%bs;
1746       ridx = row%bs;
1747       high = nrow;
1748       low  = 0; /* assume unsorted */
1749       while (high-low > 5) {
1750         t = (low+high)/2;
1751         if (rp[t] > bcol) high = t;
1752         else             low  = t;
1753       }
1754       for (i=low; i<high; i++) {
1755         if (rp[i] > bcol) break;
1756         if (rp[i] == bcol) {
1757           *v++ = ap[bs2*i+bs*cidx+ridx];
1758           goto finished;
1759         }
1760       }
1761       *v++ = 0.0;
1762 finished:;
1763     }
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1769 {
1770   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1771   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1772   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1773   PetscErrorCode    ierr;
1774   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1775   PetscBool         roworiented=a->roworiented;
1776   const PetscScalar *value     = v;
1777   MatScalar         *ap=NULL,*aa = a->a,*bap;
1778 
1779   PetscFunctionBegin;
1780   if (roworiented) {
1781     stepval = (n-1)*bs;
1782   } else {
1783     stepval = (m-1)*bs;
1784   }
1785   for (k=0; k<m; k++) { /* loop over added rows */
1786     row = im[k];
1787     if (row < 0) continue;
1788     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1789     rp   = aj + ai[row];
1790     if (!A->structure_only) ap = aa + bs2*ai[row];
1791     rmax = imax[row];
1792     nrow = ailen[row];
1793     low  = 0;
1794     high = nrow;
1795     for (l=0; l<n; l++) { /* loop over added columns */
1796       if (in[l] < 0) continue;
1797       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1798       col = in[l];
1799       if (!A->structure_only) {
1800         if (roworiented) {
1801           value = v + (k*(stepval+bs) + l)*bs;
1802         } else {
1803           value = v + (l*(stepval+bs) + k)*bs;
1804         }
1805       }
1806       if (col <= lastcol) low = 0;
1807       else high = nrow;
1808       lastcol = col;
1809       while (high-low > 7) {
1810         t = (low+high)/2;
1811         if (rp[t] > col) high = t;
1812         else             low  = t;
1813       }
1814       for (i=low; i<high; i++) {
1815         if (rp[i] > col) break;
1816         if (rp[i] == col) {
1817           if (A->structure_only) goto noinsert2;
1818           bap = ap +  bs2*i;
1819           if (roworiented) {
1820             if (is == ADD_VALUES) {
1821               for (ii=0; ii<bs; ii++,value+=stepval) {
1822                 for (jj=ii; jj<bs2; jj+=bs) {
1823                   bap[jj] += *value++;
1824                 }
1825               }
1826             } else {
1827               for (ii=0; ii<bs; ii++,value+=stepval) {
1828                 for (jj=ii; jj<bs2; jj+=bs) {
1829                   bap[jj] = *value++;
1830                 }
1831               }
1832             }
1833           } else {
1834             if (is == ADD_VALUES) {
1835               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1836                 for (jj=0; jj<bs; jj++) {
1837                   bap[jj] += value[jj];
1838                 }
1839                 bap += bs;
1840               }
1841             } else {
1842               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1843                 for (jj=0; jj<bs; jj++) {
1844                   bap[jj]  = value[jj];
1845                 }
1846                 bap += bs;
1847               }
1848             }
1849           }
1850           goto noinsert2;
1851         }
1852       }
1853       if (nonew == 1) goto noinsert2;
1854       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1855       if (A->structure_only) {
1856         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1857       } else {
1858         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1859       }
1860       N = nrow++ - 1; high++;
1861       /* shift up all the later entries in this row */
1862       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1863       rp[i] = col;
1864       if (!A->structure_only) {
1865         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1866         bap   = ap +  bs2*i;
1867         if (roworiented) {
1868           for (ii=0; ii<bs; ii++,value+=stepval) {
1869             for (jj=ii; jj<bs2; jj+=bs) {
1870               bap[jj] = *value++;
1871             }
1872           }
1873         } else {
1874           for (ii=0; ii<bs; ii++,value+=stepval) {
1875             for (jj=0; jj<bs; jj++) {
1876               *bap++ = *value++;
1877             }
1878           }
1879         }
1880       }
1881 noinsert2:;
1882       low = i;
1883     }
1884     ailen[row] = nrow;
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1890 {
1891   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1892   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1893   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1894   PetscErrorCode ierr;
1895   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1896   MatScalar      *aa  = a->a,*ap;
1897   PetscReal      ratio=0.6;
1898 
1899   PetscFunctionBegin;
1900   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1901 
1902   if (m) rmax = ailen[0];
1903   for (i=1; i<mbs; i++) {
1904     /* move each row back by the amount of empty slots (fshift) before it*/
1905     fshift += imax[i-1] - ailen[i-1];
1906     rmax    = PetscMax(rmax,ailen[i]);
1907     if (fshift) {
1908       ip = aj + ai[i];
1909       ap = aa + bs2*ai[i];
1910       N  = ailen[i];
1911       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1912       if (!A->structure_only) {
1913         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1914       }
1915     }
1916     ai[i] = ai[i-1] + ailen[i-1];
1917   }
1918   if (mbs) {
1919     fshift += imax[mbs-1] - ailen[mbs-1];
1920     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1921   }
1922 
1923   /* reset ilen and imax for each row */
1924   a->nonzerorowcnt = 0;
1925   if (A->structure_only) {
1926     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1927   } else { /* !A->structure_only */
1928     for (i=0; i<mbs; i++) {
1929       ailen[i] = imax[i] = ai[i+1] - ai[i];
1930       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1931     }
1932   }
1933   a->nz = ai[mbs];
1934 
1935   /* diagonals may have moved, so kill the diagonal pointers */
1936   a->idiagvalid = PETSC_FALSE;
1937   if (fshift && a->diag) {
1938     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1939     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1940     a->diag = NULL;
1941   }
1942   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1943   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1944   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1945   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1946 
1947   A->info.mallocs    += a->reallocs;
1948   a->reallocs         = 0;
1949   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1950   a->rmax             = rmax;
1951 
1952   if (!A->structure_only) {
1953     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 /*
1959    This function returns an array of flags which indicate the locations of contiguous
1960    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1961    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1962    Assume: sizes should be long enough to hold all the values.
1963 */
1964 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1965 {
1966   PetscInt  i,j,k,row;
1967   PetscBool flg;
1968 
1969   PetscFunctionBegin;
1970   for (i=0,j=0; i<n; j++) {
1971     row = idx[i];
1972     if (row%bs!=0) { /* Not the begining of a block */
1973       sizes[j] = 1;
1974       i++;
1975     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1976       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1977       i++;
1978     } else { /* Begining of the block, so check if the complete block exists */
1979       flg = PETSC_TRUE;
1980       for (k=1; k<bs; k++) {
1981         if (row+k != idx[i+k]) { /* break in the block */
1982           flg = PETSC_FALSE;
1983           break;
1984         }
1985       }
1986       if (flg) { /* No break in the bs */
1987         sizes[j] = bs;
1988         i       += bs;
1989       } else {
1990         sizes[j] = 1;
1991         i++;
1992       }
1993     }
1994   }
1995   *bs_max = j;
1996   PetscFunctionReturn(0);
1997 }
1998 
1999 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2000 {
2001   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2002   PetscErrorCode    ierr;
2003   PetscInt          i,j,k,count,*rows;
2004   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2005   PetscScalar       zero = 0.0;
2006   MatScalar         *aa;
2007   const PetscScalar *xx;
2008   PetscScalar       *bb;
2009 
2010   PetscFunctionBegin;
2011   /* fix right hand side if needed */
2012   if (x && b) {
2013     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2014     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2015     for (i=0; i<is_n; i++) {
2016       bb[is_idx[i]] = diag*xx[is_idx[i]];
2017     }
2018     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2019     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2020   }
2021 
2022   /* Make a copy of the IS and  sort it */
2023   /* allocate memory for rows,sizes */
2024   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2025 
2026   /* copy IS values to rows, and sort them */
2027   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2028   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2029 
2030   if (baij->keepnonzeropattern) {
2031     for (i=0; i<is_n; i++) sizes[i] = 1;
2032     bs_max          = is_n;
2033   } else {
2034     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2035     A->nonzerostate++;
2036   }
2037 
2038   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2039     row = rows[j];
2040     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2041     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2042     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2043     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2044       if (diag != (PetscScalar)0.0) {
2045         if (baij->ilen[row/bs] > 0) {
2046           baij->ilen[row/bs]       = 1;
2047           baij->j[baij->i[row/bs]] = row/bs;
2048 
2049           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2050         }
2051         /* Now insert all the diagonal values for this bs */
2052         for (k=0; k<bs; k++) {
2053           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2054         }
2055       } else { /* (diag == 0.0) */
2056         baij->ilen[row/bs] = 0;
2057       } /* end (diag == 0.0) */
2058     } else { /* (sizes[i] != bs) */
2059       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2060       for (k=0; k<count; k++) {
2061         aa[0] =  zero;
2062         aa   += bs;
2063       }
2064       if (diag != (PetscScalar)0.0) {
2065         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2066       }
2067     }
2068   }
2069 
2070   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2071   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2076 {
2077   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2078   PetscErrorCode    ierr;
2079   PetscInt          i,j,k,count;
2080   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2081   PetscScalar       zero = 0.0;
2082   MatScalar         *aa;
2083   const PetscScalar *xx;
2084   PetscScalar       *bb;
2085   PetscBool         *zeroed,vecs = PETSC_FALSE;
2086 
2087   PetscFunctionBegin;
2088   /* fix right hand side if needed */
2089   if (x && b) {
2090     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2091     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2092     vecs = PETSC_TRUE;
2093   }
2094 
2095   /* zero the columns */
2096   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2097   for (i=0; i<is_n; i++) {
2098     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2099     zeroed[is_idx[i]] = PETSC_TRUE;
2100   }
2101   for (i=0; i<A->rmap->N; i++) {
2102     if (!zeroed[i]) {
2103       row = i/bs;
2104       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2105         for (k=0; k<bs; k++) {
2106           col = bs*baij->j[j] + k;
2107           if (zeroed[col]) {
2108             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2109             if (vecs) bb[i] -= aa[0]*xx[col];
2110             aa[0] = 0.0;
2111           }
2112         }
2113       }
2114     } else if (vecs) bb[i] = diag*xx[i];
2115   }
2116   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2117   if (vecs) {
2118     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2119     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2120   }
2121 
2122   /* zero the rows */
2123   for (i=0; i<is_n; i++) {
2124     row   = is_idx[i];
2125     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2126     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2127     for (k=0; k<count; k++) {
2128       aa[0] =  zero;
2129       aa   += bs;
2130     }
2131     if (diag != (PetscScalar)0.0) {
2132       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2133     }
2134   }
2135   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2136   PetscFunctionReturn(0);
2137 }
2138 
2139 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2140 {
2141   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2142   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2143   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2144   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2145   PetscErrorCode ierr;
2146   PetscInt       ridx,cidx,bs2=a->bs2;
2147   PetscBool      roworiented=a->roworiented;
2148   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2149 
2150   PetscFunctionBegin;
2151   for (k=0; k<m; k++) { /* loop over added rows */
2152     row  = im[k];
2153     brow = row/bs;
2154     if (row < 0) continue;
2155     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2156     rp   = aj + ai[brow];
2157     if (!A->structure_only) ap = aa + bs2*ai[brow];
2158     rmax = imax[brow];
2159     nrow = ailen[brow];
2160     low  = 0;
2161     high = nrow;
2162     for (l=0; l<n; l++) { /* loop over added columns */
2163       if (in[l] < 0) continue;
2164       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2165       col  = in[l]; bcol = col/bs;
2166       ridx = row % bs; cidx = col % bs;
2167       if (!A->structure_only) {
2168         if (roworiented) {
2169           value = v[l + k*n];
2170         } else {
2171           value = v[k + l*m];
2172         }
2173       }
2174       if (col <= lastcol) low = 0; else high = nrow;
2175       lastcol = col;
2176       while (high-low > 7) {
2177         t = (low+high)/2;
2178         if (rp[t] > bcol) high = t;
2179         else              low  = t;
2180       }
2181       for (i=low; i<high; i++) {
2182         if (rp[i] > bcol) break;
2183         if (rp[i] == bcol) {
2184           bap = ap +  bs2*i + bs*cidx + ridx;
2185           if (!A->structure_only) {
2186             if (is == ADD_VALUES) *bap += value;
2187             else                  *bap  = value;
2188           }
2189           goto noinsert1;
2190         }
2191       }
2192       if (nonew == 1) goto noinsert1;
2193       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2194       if (A->structure_only) {
2195         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2196       } else {
2197         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2198       }
2199       N = nrow++ - 1; high++;
2200       /* shift up all the later entries in this row */
2201       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2202       rp[i] = bcol;
2203       if (!A->structure_only) {
2204         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2205         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2206         ap[bs2*i + bs*cidx + ridx] = value;
2207       }
2208       a->nz++;
2209       A->nonzerostate++;
2210 noinsert1:;
2211       low = i;
2212     }
2213     ailen[brow] = nrow;
2214   }
2215   PetscFunctionReturn(0);
2216 }
2217 
2218 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2219 {
2220   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2221   Mat            outA;
2222   PetscErrorCode ierr;
2223   PetscBool      row_identity,col_identity;
2224 
2225   PetscFunctionBegin;
2226   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2227   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2228   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2229   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2230 
2231   outA            = inA;
2232   inA->factortype = MAT_FACTOR_LU;
2233   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2234   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2235 
2236   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2237 
2238   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2239   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2240   a->row = row;
2241   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2242   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2243   a->col = col;
2244 
2245   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2246   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2247   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2248   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2249 
2250   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2251   if (!a->solve_work) {
2252     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2253     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2254   }
2255   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2256   PetscFunctionReturn(0);
2257 }
2258 
2259 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2260 {
2261   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2262   PetscInt    i,nz,mbs;
2263 
2264   PetscFunctionBegin;
2265   nz  = baij->maxnz;
2266   mbs = baij->mbs;
2267   for (i=0; i<nz; i++) {
2268     baij->j[i] = indices[i];
2269   }
2270   baij->nz = nz;
2271   for (i=0; i<mbs; i++) {
2272     baij->ilen[i] = baij->imax[i];
2273   }
2274   PetscFunctionReturn(0);
2275 }
2276 
2277 /*@
2278     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2279        in the matrix.
2280 
2281   Input Parameters:
2282 +  mat - the SeqBAIJ matrix
2283 -  indices - the column indices
2284 
2285   Level: advanced
2286 
2287   Notes:
2288     This can be called if you have precomputed the nonzero structure of the
2289   matrix and want to provide it to the matrix object to improve the performance
2290   of the MatSetValues() operation.
2291 
2292     You MUST have set the correct numbers of nonzeros per row in the call to
2293   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2294 
2295     MUST be called before any calls to MatSetValues();
2296 
2297 @*/
2298 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2299 {
2300   PetscErrorCode ierr;
2301 
2302   PetscFunctionBegin;
2303   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2304   PetscValidPointer(indices,2);
2305   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2310 {
2311   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2312   PetscErrorCode ierr;
2313   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2314   PetscReal      atmp;
2315   PetscScalar    *x,zero = 0.0;
2316   MatScalar      *aa;
2317   PetscInt       ncols,brow,krow,kcol;
2318 
2319   PetscFunctionBegin;
2320   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2321   bs  = A->rmap->bs;
2322   aa  = a->a;
2323   ai  = a->i;
2324   aj  = a->j;
2325   mbs = a->mbs;
2326 
2327   ierr = VecSet(v,zero);CHKERRQ(ierr);
2328   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2329   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2330   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2331   for (i=0; i<mbs; i++) {
2332     ncols = ai[1] - ai[0]; ai++;
2333     brow  = bs*i;
2334     for (j=0; j<ncols; j++) {
2335       for (kcol=0; kcol<bs; kcol++) {
2336         for (krow=0; krow<bs; krow++) {
2337           atmp = PetscAbsScalar(*aa);aa++;
2338           row  = brow + krow;   /* row index */
2339           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2340         }
2341       }
2342       aj++;
2343     }
2344   }
2345   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2350 {
2351   PetscErrorCode ierr;
2352 
2353   PetscFunctionBegin;
2354   /* If the two matrices have the same copy implementation, use fast copy. */
2355   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2356     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2357     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2358     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2359 
2360     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2361     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2362     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2363     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2364   } else {
2365     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2366   }
2367   PetscFunctionReturn(0);
2368 }
2369 
2370 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2371 {
2372   PetscErrorCode ierr;
2373 
2374   PetscFunctionBegin;
2375   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2380 {
2381   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2382 
2383   PetscFunctionBegin;
2384   *array = a->a;
2385   PetscFunctionReturn(0);
2386 }
2387 
2388 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2389 {
2390   PetscFunctionBegin;
2391   *array = NULL;
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2396 {
2397   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2398   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2399   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   /* Set the number of nonzeros in the new matrix */
2404   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2409 {
2410   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2411   PetscErrorCode ierr;
2412   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2413   PetscBLASInt   one=1;
2414 
2415   PetscFunctionBegin;
2416   if (str == SAME_NONZERO_PATTERN) {
2417     PetscScalar  alpha = a;
2418     PetscBLASInt bnz;
2419     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2420     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2421     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2422   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2423     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2424   } else {
2425     Mat      B;
2426     PetscInt *nnz;
2427     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2428     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2429     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2430     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2431     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2432     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2433     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2434     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2435     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2436     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2437     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2438     ierr = PetscFree(nnz);CHKERRQ(ierr);
2439   }
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2444 {
2445 #if defined(PETSC_USE_COMPLEX)
2446   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2447   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2448   MatScalar   *aa = a->a;
2449 
2450   PetscFunctionBegin;
2451   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2452 #else
2453   PetscFunctionBegin;
2454 #endif
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2459 {
2460   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2461   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2462   MatScalar   *aa = a->a;
2463 
2464   PetscFunctionBegin;
2465   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 /*
2481     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2482 */
2483 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2484 {
2485   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2486   PetscErrorCode ierr;
2487   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2488   PetscInt       nz = a->i[m],row,*jj,mr,col;
2489 
2490   PetscFunctionBegin;
2491   *nn = n;
2492   if (!ia) PetscFunctionReturn(0);
2493   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2494   else {
2495     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2496     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2497     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2498     jj   = a->j;
2499     for (i=0; i<nz; i++) {
2500       collengths[jj[i]]++;
2501     }
2502     cia[0] = oshift;
2503     for (i=0; i<n; i++) {
2504       cia[i+1] = cia[i] + collengths[i];
2505     }
2506     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2507     jj   = a->j;
2508     for (row=0; row<m; row++) {
2509       mr = a->i[row+1] - a->i[row];
2510       for (i=0; i<mr; i++) {
2511         col = *jj++;
2512 
2513         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2514       }
2515     }
2516     ierr = PetscFree(collengths);CHKERRQ(ierr);
2517     *ia  = cia; *ja = cja;
2518   }
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2523 {
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   if (!ia) PetscFunctionReturn(0);
2528   ierr = PetscFree(*ia);CHKERRQ(ierr);
2529   ierr = PetscFree(*ja);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 /*
2534  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2535  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2536  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2537  */
2538 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2539 {
2540   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2541   PetscErrorCode ierr;
2542   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2543   PetscInt       nz = a->i[m],row,*jj,mr,col;
2544   PetscInt       *cspidx;
2545 
2546   PetscFunctionBegin;
2547   *nn = n;
2548   if (!ia) PetscFunctionReturn(0);
2549 
2550   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2551   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2552   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2553   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2554   jj   = a->j;
2555   for (i=0; i<nz; i++) {
2556     collengths[jj[i]]++;
2557   }
2558   cia[0] = oshift;
2559   for (i=0; i<n; i++) {
2560     cia[i+1] = cia[i] + collengths[i];
2561   }
2562   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2563   jj   = a->j;
2564   for (row=0; row<m; row++) {
2565     mr = a->i[row+1] - a->i[row];
2566     for (i=0; i<mr; i++) {
2567       col = *jj++;
2568       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2569       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2570     }
2571   }
2572   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2573   *ia    = cia;
2574   *ja    = cja;
2575   *spidx = cspidx;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2580 {
2581   PetscErrorCode ierr;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2585   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2590 {
2591   PetscErrorCode ierr;
2592   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2593 
2594   PetscFunctionBegin;
2595   if (!Y->preallocated || !aij->nz) {
2596     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2597   }
2598   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 /* -------------------------------------------------------------------*/
2603 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2604                                        MatGetRow_SeqBAIJ,
2605                                        MatRestoreRow_SeqBAIJ,
2606                                        MatMult_SeqBAIJ_N,
2607                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2608                                        MatMultTranspose_SeqBAIJ,
2609                                        MatMultTransposeAdd_SeqBAIJ,
2610                                        NULL,
2611                                        NULL,
2612                                        NULL,
2613                                /* 10*/ NULL,
2614                                        MatLUFactor_SeqBAIJ,
2615                                        NULL,
2616                                        NULL,
2617                                        MatTranspose_SeqBAIJ,
2618                                /* 15*/ MatGetInfo_SeqBAIJ,
2619                                        MatEqual_SeqBAIJ,
2620                                        MatGetDiagonal_SeqBAIJ,
2621                                        MatDiagonalScale_SeqBAIJ,
2622                                        MatNorm_SeqBAIJ,
2623                                /* 20*/ NULL,
2624                                        MatAssemblyEnd_SeqBAIJ,
2625                                        MatSetOption_SeqBAIJ,
2626                                        MatZeroEntries_SeqBAIJ,
2627                                /* 24*/ MatZeroRows_SeqBAIJ,
2628                                        NULL,
2629                                        NULL,
2630                                        NULL,
2631                                        NULL,
2632                                /* 29*/ MatSetUp_SeqBAIJ,
2633                                        NULL,
2634                                        NULL,
2635                                        NULL,
2636                                        NULL,
2637                                /* 34*/ MatDuplicate_SeqBAIJ,
2638                                        NULL,
2639                                        NULL,
2640                                        MatILUFactor_SeqBAIJ,
2641                                        NULL,
2642                                /* 39*/ MatAXPY_SeqBAIJ,
2643                                        MatCreateSubMatrices_SeqBAIJ,
2644                                        MatIncreaseOverlap_SeqBAIJ,
2645                                        MatGetValues_SeqBAIJ,
2646                                        MatCopy_SeqBAIJ,
2647                                /* 44*/ NULL,
2648                                        MatScale_SeqBAIJ,
2649                                        MatShift_SeqBAIJ,
2650                                        NULL,
2651                                        MatZeroRowsColumns_SeqBAIJ,
2652                                /* 49*/ NULL,
2653                                        MatGetRowIJ_SeqBAIJ,
2654                                        MatRestoreRowIJ_SeqBAIJ,
2655                                        MatGetColumnIJ_SeqBAIJ,
2656                                        MatRestoreColumnIJ_SeqBAIJ,
2657                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2658                                        NULL,
2659                                        NULL,
2660                                        NULL,
2661                                        MatSetValuesBlocked_SeqBAIJ,
2662                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2663                                        MatDestroy_SeqBAIJ,
2664                                        MatView_SeqBAIJ,
2665                                        NULL,
2666                                        NULL,
2667                                /* 64*/ NULL,
2668                                        NULL,
2669                                        NULL,
2670                                        NULL,
2671                                        NULL,
2672                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2673                                        NULL,
2674                                        MatConvert_Basic,
2675                                        NULL,
2676                                        NULL,
2677                                /* 74*/ NULL,
2678                                        MatFDColoringApply_BAIJ,
2679                                        NULL,
2680                                        NULL,
2681                                        NULL,
2682                                /* 79*/ NULL,
2683                                        NULL,
2684                                        NULL,
2685                                        NULL,
2686                                        MatLoad_SeqBAIJ,
2687                                /* 84*/ NULL,
2688                                        NULL,
2689                                        NULL,
2690                                        NULL,
2691                                        NULL,
2692                                /* 89*/ NULL,
2693                                        NULL,
2694                                        NULL,
2695                                        NULL,
2696                                        NULL,
2697                                /* 94*/ NULL,
2698                                        NULL,
2699                                        NULL,
2700                                        NULL,
2701                                        NULL,
2702                                /* 99*/ NULL,
2703                                        NULL,
2704                                        NULL,
2705                                        MatConjugate_SeqBAIJ,
2706                                        NULL,
2707                                /*104*/ NULL,
2708                                        MatRealPart_SeqBAIJ,
2709                                        MatImaginaryPart_SeqBAIJ,
2710                                        NULL,
2711                                        NULL,
2712                                /*109*/ NULL,
2713                                        NULL,
2714                                        NULL,
2715                                        NULL,
2716                                        MatMissingDiagonal_SeqBAIJ,
2717                                /*114*/ NULL,
2718                                        NULL,
2719                                        NULL,
2720                                        NULL,
2721                                        NULL,
2722                                /*119*/ NULL,
2723                                        NULL,
2724                                        MatMultHermitianTranspose_SeqBAIJ,
2725                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2726                                        NULL,
2727                                /*124*/ NULL,
2728                                        NULL,
2729                                        MatInvertBlockDiagonal_SeqBAIJ,
2730                                        NULL,
2731                                        NULL,
2732                                /*129*/ NULL,
2733                                        NULL,
2734                                        NULL,
2735                                        NULL,
2736                                        NULL,
2737                                /*134*/ NULL,
2738                                        NULL,
2739                                        NULL,
2740                                        NULL,
2741                                        NULL,
2742                                /*139*/ MatSetBlockSizes_Default,
2743                                        NULL,
2744                                        NULL,
2745                                        MatFDColoringSetUp_SeqXAIJ,
2746                                        NULL,
2747                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2748                                        MatDestroySubMatrices_SeqBAIJ
2749 };
2750 
2751 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2752 {
2753   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2754   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2759 
2760   /* allocate space for values if not already there */
2761   if (!aij->saved_values) {
2762     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2763     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2764   }
2765 
2766   /* copy values over */
2767   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2772 {
2773   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2774   PetscErrorCode ierr;
2775   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2776 
2777   PetscFunctionBegin;
2778   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2779   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2780 
2781   /* copy values over */
2782   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2787 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2788 
2789 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2790 {
2791   Mat_SeqBAIJ    *b;
2792   PetscErrorCode ierr;
2793   PetscInt       i,mbs,nbs,bs2;
2794   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2795 
2796   PetscFunctionBegin;
2797   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2798   if (nz == MAT_SKIP_ALLOCATION) {
2799     skipallocation = PETSC_TRUE;
2800     nz             = 0;
2801   }
2802 
2803   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2807 
2808   B->preallocated = PETSC_TRUE;
2809 
2810   mbs = B->rmap->n/bs;
2811   nbs = B->cmap->n/bs;
2812   bs2 = bs*bs;
2813 
2814   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2815 
2816   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2817   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2818   if (nnz) {
2819     for (i=0; i<mbs; i++) {
2820       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2821       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2822     }
2823   }
2824 
2825   b    = (Mat_SeqBAIJ*)B->data;
2826   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2827   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2828   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2829 
2830   if (!flg) {
2831     switch (bs) {
2832     case 1:
2833       B->ops->mult    = MatMult_SeqBAIJ_1;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2835       break;
2836     case 2:
2837       B->ops->mult    = MatMult_SeqBAIJ_2;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2839       break;
2840     case 3:
2841       B->ops->mult    = MatMult_SeqBAIJ_3;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2843       break;
2844     case 4:
2845       B->ops->mult    = MatMult_SeqBAIJ_4;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2847       break;
2848     case 5:
2849       B->ops->mult    = MatMult_SeqBAIJ_5;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2851       break;
2852     case 6:
2853       B->ops->mult    = MatMult_SeqBAIJ_6;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2855       break;
2856     case 7:
2857       B->ops->mult    = MatMult_SeqBAIJ_7;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2859       break;
2860     case 9:
2861     {
2862       PetscInt version = 1;
2863       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2864       switch (version) {
2865 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2866       case 1:
2867         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2868         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2869         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2870         break;
2871 #endif
2872       default:
2873         B->ops->mult    = MatMult_SeqBAIJ_N;
2874         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2875         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2876         break;
2877       }
2878       break;
2879     }
2880     case 11:
2881       B->ops->mult    = MatMult_SeqBAIJ_11;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2883       break;
2884     case 12:
2885     {
2886       PetscInt version = 1;
2887       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2888       switch (version) {
2889       case 1:
2890         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2891         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2892         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2893         break;
2894       case 2:
2895         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2896         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2897         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2898         break;
2899 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2900       case 3:
2901         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2902         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2903         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);
2904         break;
2905 #endif
2906       default:
2907         B->ops->mult    = MatMult_SeqBAIJ_N;
2908         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2909         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2910         break;
2911       }
2912       break;
2913     }
2914     case 15:
2915     {
2916       PetscInt version = 1;
2917       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2918       switch (version) {
2919       case 1:
2920         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2921         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2922         break;
2923       case 2:
2924         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2925         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2926         break;
2927       case 3:
2928         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2929         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2930         break;
2931       case 4:
2932         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2933         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);
2934         break;
2935       default:
2936         B->ops->mult    = MatMult_SeqBAIJ_N;
2937         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2938         break;
2939       }
2940       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2941       break;
2942     }
2943     default:
2944       B->ops->mult    = MatMult_SeqBAIJ_N;
2945       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2946       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);
2947       break;
2948     }
2949   }
2950   B->ops->sor = MatSOR_SeqBAIJ;
2951   b->mbs = mbs;
2952   b->nbs = nbs;
2953   if (!skipallocation) {
2954     if (!b->imax) {
2955       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2956       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2957 
2958       b->free_imax_ilen = PETSC_TRUE;
2959     }
2960     /* b->ilen will count nonzeros in each block row so far. */
2961     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2962     if (!nnz) {
2963       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2964       else if (nz < 0) nz = 1;
2965       nz = PetscMin(nz,nbs);
2966       for (i=0; i<mbs; i++) b->imax[i] = nz;
2967       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
2968     } else {
2969       PetscInt64 nz64 = 0;
2970       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2971       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2972     }
2973 
2974     /* allocate the matrix space */
2975     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2976     if (B->structure_only) {
2977       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2978       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2979       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2980     } else {
2981       PetscInt nzbs2 = 0;
2982       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
2983       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2984       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2985       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2986     }
2987     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2988 
2989     if (B->structure_only) {
2990       b->singlemalloc = PETSC_FALSE;
2991       b->free_a       = PETSC_FALSE;
2992     } else {
2993       b->singlemalloc = PETSC_TRUE;
2994       b->free_a       = PETSC_TRUE;
2995     }
2996     b->free_ij = PETSC_TRUE;
2997 
2998     b->i[0] = 0;
2999     for (i=1; i<mbs+1; i++) {
3000       b->i[i] = b->i[i-1] + b->imax[i-1];
3001     }
3002 
3003   } else {
3004     b->free_a  = PETSC_FALSE;
3005     b->free_ij = PETSC_FALSE;
3006   }
3007 
3008   b->bs2              = bs2;
3009   b->mbs              = mbs;
3010   b->nz               = 0;
3011   b->maxnz            = nz;
3012   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3013   B->was_assembled    = PETSC_FALSE;
3014   B->assembled        = PETSC_FALSE;
3015   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3016   PetscFunctionReturn(0);
3017 }
3018 
3019 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3020 {
3021   PetscInt       i,m,nz,nz_max=0,*nnz;
3022   PetscScalar    *values=NULL;
3023   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3024   PetscErrorCode ierr;
3025 
3026   PetscFunctionBegin;
3027   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3028   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3029   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3030   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3031   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3032   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3033   m    = B->rmap->n/bs;
3034 
3035   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3036   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3037   for (i=0; i<m; i++) {
3038     nz = ii[i+1]- ii[i];
3039     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3040     nz_max = PetscMax(nz_max, nz);
3041     nnz[i] = nz;
3042   }
3043   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3044   ierr = PetscFree(nnz);CHKERRQ(ierr);
3045 
3046   values = (PetscScalar*)V;
3047   if (!values) {
3048     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3049   }
3050   for (i=0; i<m; i++) {
3051     PetscInt          ncols  = ii[i+1] - ii[i];
3052     const PetscInt    *icols = jj + ii[i];
3053     if (bs == 1 || !roworiented) {
3054       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3055       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3056     } else {
3057       PetscInt j;
3058       for (j=0; j<ncols; j++) {
3059         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3060         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3061       }
3062     }
3063   }
3064   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3065   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3066   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3067   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 /*@C
3072    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3073 
3074    Not Collective
3075 
3076    Input Parameter:
3077 .  mat - a MATSEQBAIJ matrix
3078 
3079    Output Parameter:
3080 .   array - pointer to the data
3081 
3082    Level: intermediate
3083 
3084 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3085 @*/
3086 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3087 {
3088   PetscErrorCode ierr;
3089 
3090   PetscFunctionBegin;
3091   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3092   PetscFunctionReturn(0);
3093 }
3094 
3095 /*@C
3096    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3097 
3098    Not Collective
3099 
3100    Input Parameters:
3101 +  mat - a MATSEQBAIJ matrix
3102 -  array - pointer to the data
3103 
3104    Level: intermediate
3105 
3106 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3107 @*/
3108 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3109 {
3110   PetscErrorCode ierr;
3111 
3112   PetscFunctionBegin;
3113   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3114   PetscFunctionReturn(0);
3115 }
3116 
3117 /*MC
3118    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3119    block sparse compressed row format.
3120 
3121    Options Database Keys:
3122 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3123 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3124 
3125    Level: beginner
3126 
3127    Notes:
3128     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3129     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3130 
3131    Run with -info to see what version of the matrix-vector product is being used
3132 
3133 .seealso: MatCreateSeqBAIJ()
3134 M*/
3135 
3136 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3137 
3138 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3139 {
3140   PetscErrorCode ierr;
3141   PetscMPIInt    size;
3142   Mat_SeqBAIJ    *b;
3143 
3144   PetscFunctionBegin;
3145   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3146   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3147 
3148   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3149   B->data = (void*)b;
3150   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3151 
3152   b->row          = NULL;
3153   b->col          = NULL;
3154   b->icol         = NULL;
3155   b->reallocs     = 0;
3156   b->saved_values = NULL;
3157 
3158   b->roworiented        = PETSC_TRUE;
3159   b->nonew              = 0;
3160   b->diag               = NULL;
3161   B->spptr              = NULL;
3162   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3163   b->keepnonzeropattern = PETSC_FALSE;
3164 
3165   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3166   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3167   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3168   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3173   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3175   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3176 #if defined(PETSC_HAVE_HYPRE)
3177   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3178 #endif
3179   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3180   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3181   PetscFunctionReturn(0);
3182 }
3183 
3184 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3185 {
3186   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3187   PetscErrorCode ierr;
3188   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3189 
3190   PetscFunctionBegin;
3191   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3192 
3193   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3194     c->imax           = a->imax;
3195     c->ilen           = a->ilen;
3196     c->free_imax_ilen = PETSC_FALSE;
3197   } else {
3198     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3199     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3200     for (i=0; i<mbs; i++) {
3201       c->imax[i] = a->imax[i];
3202       c->ilen[i] = a->ilen[i];
3203     }
3204     c->free_imax_ilen = PETSC_TRUE;
3205   }
3206 
3207   /* allocate the matrix space */
3208   if (mallocmatspace) {
3209     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3210       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3211       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3212 
3213       c->i            = a->i;
3214       c->j            = a->j;
3215       c->singlemalloc = PETSC_FALSE;
3216       c->free_a       = PETSC_TRUE;
3217       c->free_ij      = PETSC_FALSE;
3218       c->parent       = A;
3219       C->preallocated = PETSC_TRUE;
3220       C->assembled    = PETSC_TRUE;
3221 
3222       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3223       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3224       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3225     } else {
3226       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3227       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3228 
3229       c->singlemalloc = PETSC_TRUE;
3230       c->free_a       = PETSC_TRUE;
3231       c->free_ij      = PETSC_TRUE;
3232 
3233       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3234       if (mbs > 0) {
3235         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3236         if (cpvalues == MAT_COPY_VALUES) {
3237           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3238         } else {
3239           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3240         }
3241       }
3242       C->preallocated = PETSC_TRUE;
3243       C->assembled    = PETSC_TRUE;
3244     }
3245   }
3246 
3247   c->roworiented = a->roworiented;
3248   c->nonew       = a->nonew;
3249 
3250   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3251   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3252 
3253   c->bs2         = a->bs2;
3254   c->mbs         = a->mbs;
3255   c->nbs         = a->nbs;
3256 
3257   if (a->diag) {
3258     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3259       c->diag      = a->diag;
3260       c->free_diag = PETSC_FALSE;
3261     } else {
3262       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3263       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3264       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3265       c->free_diag = PETSC_TRUE;
3266     }
3267   } else c->diag = NULL;
3268 
3269   c->nz         = a->nz;
3270   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3271   c->solve_work = NULL;
3272   c->mult_work  = NULL;
3273   c->sor_workt  = NULL;
3274   c->sor_work   = NULL;
3275 
3276   c->compressedrow.use   = a->compressedrow.use;
3277   c->compressedrow.nrows = a->compressedrow.nrows;
3278   if (a->compressedrow.use) {
3279     i    = a->compressedrow.nrows;
3280     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3281     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3282     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3283     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3284   } else {
3285     c->compressedrow.use    = PETSC_FALSE;
3286     c->compressedrow.i      = NULL;
3287     c->compressedrow.rindex = NULL;
3288   }
3289   C->nonzerostate = A->nonzerostate;
3290 
3291   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3292   PetscFunctionReturn(0);
3293 }
3294 
3295 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3296 {
3297   PetscErrorCode ierr;
3298 
3299   PetscFunctionBegin;
3300   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3301   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3302   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3303   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3304   PetscFunctionReturn(0);
3305 }
3306 
3307 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3308 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3309 {
3310   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3311   PetscInt       *rowidxs,*colidxs;
3312   PetscScalar    *matvals;
3313   PetscErrorCode ierr;
3314 
3315   PetscFunctionBegin;
3316   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3317 
3318   /* read matrix header */
3319   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3320   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3321   M = header[1]; N = header[2]; nz = header[3];
3322   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3323   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3324   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3325 
3326   /* set block sizes from the viewer's .info file */
3327   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3328   /* set local and global sizes if not set already */
3329   if (mat->rmap->n < 0) mat->rmap->n = M;
3330   if (mat->cmap->n < 0) mat->cmap->n = N;
3331   if (mat->rmap->N < 0) mat->rmap->N = M;
3332   if (mat->cmap->N < 0) mat->cmap->N = N;
3333   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3334   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3335 
3336   /* check if the matrix sizes are correct */
3337   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3338   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3339   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3340   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3341   mbs = m/bs; nbs = n/bs;
3342 
3343   /* read in row lengths, column indices and nonzero values */
3344   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3345   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3346   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3347   sum = rowidxs[m];
3348   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3349 
3350   /* read in column indices and nonzero values */
3351   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3352   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3353   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3354 
3355   { /* preallocate matrix storage */
3356     PetscBT   bt; /* helper bit set to count nonzeros */
3357     PetscInt  *nnz;
3358     PetscBool sbaij;
3359 
3360     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3361     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3362     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3363     for (i=0; i<mbs; i++) {
3364       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3365       for (k=0; k<bs; k++) {
3366         PetscInt row = bs*i + k;
3367         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3368           PetscInt col = colidxs[j];
3369           if (!sbaij || col >= row)
3370             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3371         }
3372       }
3373     }
3374     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3375     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3376     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3377     ierr = PetscFree(nnz);CHKERRQ(ierr);
3378   }
3379 
3380   /* store matrix values */
3381   for (i=0; i<m; i++) {
3382     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3383     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3384   }
3385 
3386   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3387   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3388   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3389   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3390   PetscFunctionReturn(0);
3391 }
3392 
3393 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3394 {
3395   PetscErrorCode ierr;
3396   PetscBool      isbinary;
3397 
3398   PetscFunctionBegin;
3399   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3400   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3401   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3402   PetscFunctionReturn(0);
3403 }
3404 
3405 /*@C
3406    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3407    compressed row) format.  For good matrix assembly performance the
3408    user should preallocate the matrix storage by setting the parameter nz
3409    (or the array nnz).  By setting these parameters accurately, performance
3410    during matrix assembly can be increased by more than a factor of 50.
3411 
3412    Collective
3413 
3414    Input Parameters:
3415 +  comm - MPI communicator, set to PETSC_COMM_SELF
3416 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3417           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3418 .  m - number of rows
3419 .  n - number of columns
3420 .  nz - number of nonzero blocks  per block row (same for all rows)
3421 -  nnz - array containing the number of nonzero blocks in the various block rows
3422          (possibly different for each block row) or NULL
3423 
3424    Output Parameter:
3425 .  A - the matrix
3426 
3427    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3428    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3429    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3430 
3431    Options Database Keys:
3432 +   -mat_no_unroll - uses code that does not unroll the loops in the
3433                      block calculations (much slower)
3434 -    -mat_block_size - size of the blocks to use
3435 
3436    Level: intermediate
3437 
3438    Notes:
3439    The number of rows and columns must be divisible by blocksize.
3440 
3441    If the nnz parameter is given then the nz parameter is ignored
3442 
3443    A nonzero block is any block that as 1 or more nonzeros in it
3444 
3445    The block AIJ format is fully compatible with standard Fortran 77
3446    storage.  That is, the stored row and column indices can begin at
3447    either one (as in Fortran) or zero.  See the users' manual for details.
3448 
3449    Specify the preallocated storage with either nz or nnz (not both).
3450    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3451    allocation.  See Users-Manual: ch_mat for details.
3452    matrices.
3453 
3454 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3455 @*/
3456 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3457 {
3458   PetscErrorCode ierr;
3459 
3460   PetscFunctionBegin;
3461   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3462   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3463   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3464   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3465   PetscFunctionReturn(0);
3466 }
3467 
3468 /*@C
3469    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3470    per row in the matrix. For good matrix assembly performance the
3471    user should preallocate the matrix storage by setting the parameter nz
3472    (or the array nnz).  By setting these parameters accurately, performance
3473    during matrix assembly can be increased by more than a factor of 50.
3474 
3475    Collective
3476 
3477    Input Parameters:
3478 +  B - the matrix
3479 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3480           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3481 .  nz - number of block nonzeros per block row (same for all rows)
3482 -  nnz - array containing the number of block nonzeros in the various block rows
3483          (possibly different for each block row) or NULL
3484 
3485    Options Database Keys:
3486 +   -mat_no_unroll - uses code that does not unroll the loops in the
3487                      block calculations (much slower)
3488 -   -mat_block_size - size of the blocks to use
3489 
3490    Level: intermediate
3491 
3492    Notes:
3493    If the nnz parameter is given then the nz parameter is ignored
3494 
3495    You can call MatGetInfo() to get information on how effective the preallocation was;
3496    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3497    You can also run with the option -info and look for messages with the string
3498    malloc in them to see if additional memory allocation was needed.
3499 
3500    The block AIJ format is fully compatible with standard Fortran 77
3501    storage.  That is, the stored row and column indices can begin at
3502    either one (as in Fortran) or zero.  See the users' manual for details.
3503 
3504    Specify the preallocated storage with either nz or nnz (not both).
3505    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3506    allocation.  See Users-Manual: ch_mat for details.
3507 
3508 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3509 @*/
3510 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3511 {
3512   PetscErrorCode ierr;
3513 
3514   PetscFunctionBegin;
3515   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3516   PetscValidType(B,1);
3517   PetscValidLogicalCollectiveInt(B,bs,2);
3518   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3519   PetscFunctionReturn(0);
3520 }
3521 
3522 /*@C
3523    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3524 
3525    Collective
3526 
3527    Input Parameters:
3528 +  B - the matrix
3529 .  i - the indices into j for the start of each local row (starts with zero)
3530 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3531 -  v - optional values in the matrix
3532 
3533    Level: advanced
3534 
3535    Notes:
3536    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3537    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3538    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3539    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3540    block column and the second index is over columns within a block.
3541 
3542    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3543 
3544 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3545 @*/
3546 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3547 {
3548   PetscErrorCode ierr;
3549 
3550   PetscFunctionBegin;
3551   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3552   PetscValidType(B,1);
3553   PetscValidLogicalCollectiveInt(B,bs,2);
3554   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3555   PetscFunctionReturn(0);
3556 }
3557 
3558 
3559 /*@
3560      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3561 
3562      Collective
3563 
3564    Input Parameters:
3565 +  comm - must be an MPI communicator of size 1
3566 .  bs - size of block
3567 .  m - number of rows
3568 .  n - number of columns
3569 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3570 .  j - column indices
3571 -  a - matrix values
3572 
3573    Output Parameter:
3574 .  mat - the matrix
3575 
3576    Level: advanced
3577 
3578    Notes:
3579        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3580     once the matrix is destroyed
3581 
3582        You cannot set new nonzero locations into this matrix, that will generate an error.
3583 
3584        The i and j indices are 0 based
3585 
3586        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3587 
3588       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3589       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3590       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3591       with column-major ordering within blocks.
3592 
3593 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3594 
3595 @*/
3596 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3597 {
3598   PetscErrorCode ierr;
3599   PetscInt       ii;
3600   Mat_SeqBAIJ    *baij;
3601 
3602   PetscFunctionBegin;
3603   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3604   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3605 
3606   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3607   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3608   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3609   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3610   baij = (Mat_SeqBAIJ*)(*mat)->data;
3611   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3612   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3613 
3614   baij->i = i;
3615   baij->j = j;
3616   baij->a = a;
3617 
3618   baij->singlemalloc = PETSC_FALSE;
3619   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3620   baij->free_a       = PETSC_FALSE;
3621   baij->free_ij      = PETSC_FALSE;
3622 
3623   for (ii=0; ii<m; ii++) {
3624     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3625     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3626   }
3627   if (PetscDefined(USE_DEBUG)) {
3628     for (ii=0; ii<baij->i[m]; ii++) {
3629       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3630       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3631     }
3632   }
3633 
3634   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3635   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3636   PetscFunctionReturn(0);
3637 }
3638 
3639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3640 {
3641   PetscErrorCode ierr;
3642   PetscMPIInt    size;
3643 
3644   PetscFunctionBegin;
3645   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3646   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3647     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3648   } else {
3649     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3650   }
3651   PetscFunctionReturn(0);
3652 }
3653