xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 285fb4e2b69b3de46a0633bd0adc6a7f684caa1e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
19 
20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21 {
22   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
23   PetscErrorCode ierr;
24   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
25   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
26   PetscReal      shift = 0.0;
27   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
28 
29   PetscFunctionBegin;
30   allowzeropivot = PetscNot(A->erroriffailure);
31 
32   if (a->idiagvalid) {
33     if (values) *values = a->idiag;
34     PetscFunctionReturn(0);
35   }
36   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
37   diag_offset = a->diag;
38   if (!a->idiag) {
39     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
40     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
41   }
42   diag  = a->idiag;
43   if (values) *values = a->idiag;
44   /* factor and invert each block */
45   switch (bs) {
46   case 1:
47     for (i=0; i<mbs; i++) {
48       odiag    = v + 1*diag_offset[i];
49       diag[0]  = odiag[0];
50 
51       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52         if (allowzeropivot) {
53           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
54           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
55           A->factorerror_zeropivot_row   = i;
56           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
57         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58       }
59 
60       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
61       diag    += 1;
62     }
63     break;
64   case 2:
65     for (i=0; i<mbs; i++) {
66       odiag    = v + 4*diag_offset[i];
67       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
69       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70       diag    += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
80       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81       diag    += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
88       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
89       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90       diag  += 16;
91     }
92     break;
93   case 5:
94     for (i=0; i<mbs; i++) {
95       odiag  = v + 25*diag_offset[i];
96       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99       diag  += 25;
100     }
101     break;
102   case 6:
103     for (i=0; i<mbs; i++) {
104       odiag  = v + 36*diag_offset[i];
105       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
107       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108       diag  += 36;
109     }
110     break;
111   case 7:
112     for (i=0; i<mbs; i++) {
113       odiag  = v + 49*diag_offset[i];
114       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 49;
118     }
119     break;
120   default:
121     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122     for (i=0; i<mbs; i++) {
123       odiag  = v + bs2*diag_offset[i];
124       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
125       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127       diag  += bs2;
128     }
129     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136 {
137   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
138   PetscScalar       *x,*work,*w,*workt,*t;
139   const MatScalar   *v,*aa = a->a, *idiag;
140   const PetscScalar *b,*xb;
141   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142   PetscErrorCode    ierr;
143   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
145 
146   PetscFunctionBegin;
147   if (fshift == -1.0) fshift = 0.0; /* negative fshift indicates do not error on zero diagonal; this code never errors on zero diagonal */
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
952       }
953       if (N >= i) {
954         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k]; brow = row/4;
990     rp   = aj + ai[brow];
991     ap   = aa + 16*ai[brow];
992     nrow = ailen[brow];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col   = in[l]; bcol = col/4;
997       ridx  = row % 4; cidx = col % 4;
998       value = v[l + k*n];
999       if (col <= lastcol)  low = 0;
1000       else                high = nrow;
1001       lastcol = col;
1002       while (high-low > 7) {
1003         t = (low+high)/2;
1004         if (rp[t] > bcol) high = t;
1005         else              low  = t;
1006       }
1007       for (i=low; i<high; i++) {
1008         if (rp[i] > bcol) break;
1009         if (rp[i] == bcol) {
1010           bap   = ap +  16*i + 4*cidx + ridx;
1011           *bap += value;
1012           goto noinsert1;
1013         }
1014       }
1015       N = nrow++ - 1;
1016       high++; /* added new column thus must search to one higher than before */
1017       /* shift up all the later entries in this row */
1018       for (ii=N; ii>=i; ii--) {
1019         rp[ii+1] = rp[ii];
1020         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1021       }
1022       if (N>=i) {
1023         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1024       }
1025       rp[i]                    = bcol;
1026       ap[16*i + 4*cidx + ridx] = value;
1027 noinsert1:;
1028       low = i;
1029     }
1030     ailen[brow] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 
1035 /*
1036      Checks for missing diagonals
1037 */
1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1039 {
1040   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042   PetscInt       *diag,*ii = a->i,i;
1043 
1044   PetscFunctionBegin;
1045   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1046   *missing = PETSC_FALSE;
1047   if (A->rmap->n > 0 && !ii) {
1048     *missing = PETSC_TRUE;
1049     if (d) *d = 0;
1050     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1051   } else {
1052     diag = a->diag;
1053     for (i=0; i<a->mbs; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_HYPRE)
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpiaij_hypre_C",NULL);CHKERRQ(ierr);
1226 #endif
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1232 {
1233   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1234   PetscErrorCode ierr;
1235 
1236   PetscFunctionBegin;
1237   switch (op) {
1238   case MAT_ROW_ORIENTED:
1239     a->roworiented = flg;
1240     break;
1241   case MAT_KEEP_NONZERO_PATTERN:
1242     a->keepnonzeropattern = flg;
1243     break;
1244   case MAT_NEW_NONZERO_LOCATIONS:
1245     a->nonew = (flg ? 0 : 1);
1246     break;
1247   case MAT_NEW_NONZERO_LOCATION_ERR:
1248     a->nonew = (flg ? -1 : 0);
1249     break;
1250   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1251     a->nonew = (flg ? -2 : 0);
1252     break;
1253   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1254     a->nounused = (flg ? -1 : 0);
1255     break;
1256   case MAT_NEW_DIAGONALS:
1257   case MAT_IGNORE_OFF_PROC_ENTRIES:
1258   case MAT_USE_HASH_TABLE:
1259     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1260     break;
1261   case MAT_SPD:
1262   case MAT_SYMMETRIC:
1263   case MAT_STRUCTURALLY_SYMMETRIC:
1264   case MAT_HERMITIAN:
1265   case MAT_SYMMETRY_ETERNAL:
1266   case MAT_SUBMAT_SINGLEIS:
1267   case MAT_STRUCTURE_ONLY:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1277 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1278 {
1279   PetscErrorCode ierr;
1280   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1281   MatScalar      *aa_i;
1282   PetscScalar    *v_i;
1283 
1284   PetscFunctionBegin;
1285   bs  = A->rmap->bs;
1286   bs2 = bs*bs;
1287   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1288 
1289   bn  = row/bs;   /* Block number */
1290   bp  = row % bs; /* Block Position */
1291   M   = ai[bn+1] - ai[bn];
1292   *nz = bs*M;
1293 
1294   if (v) {
1295     *v = 0;
1296     if (*nz) {
1297       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1298       for (i=0; i<M; i++) { /* for each block in the block row */
1299         v_i  = *v + i*bs;
1300         aa_i = aa + bs2*(ai[bn] + i);
1301         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1302       }
1303     }
1304   }
1305 
1306   if (idx) {
1307     *idx = 0;
1308     if (*nz) {
1309       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1310       for (i=0; i<M; i++) { /* for each block in the block row */
1311         idx_i = *idx + i*bs;
1312         itmp  = bs*aj[ai[bn] + i];
1313         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1314       }
1315     }
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1321 {
1322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1331 {
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1336   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1341 {
1342   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data;
1343   Mat            C;
1344   PetscErrorCode ierr;
1345   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,len,*col;
1346   PetscInt       *rows,*cols,bs2=a->bs2;
1347   MatScalar      *array;
1348 
1349   PetscFunctionBegin;
1350   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1351     ierr = PetscCalloc1(1+nbs,&col);CHKERRQ(ierr);
1352 
1353     for (i=0; i<ai[mbs]; i++) col[aj[i]] += 1;
1354     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1355     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1356     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1357     ierr = MatSeqBAIJSetPreallocation(C,bs,0,col);CHKERRQ(ierr);
1358     ierr = PetscFree(col);CHKERRQ(ierr);
1359   } else {
1360     C = *B;
1361   }
1362 
1363   array = a->a;
1364   ierr  = PetscMalloc2(bs,&rows,bs,&cols);CHKERRQ(ierr);
1365   for (i=0; i<mbs; i++) {
1366     cols[0] = i*bs;
1367     for (k=1; k<bs; k++) cols[k] = cols[k-1] + 1;
1368     len = ai[i+1] - ai[i];
1369     for (j=0; j<len; j++) {
1370       rows[0] = (*aj++)*bs;
1371       for (k=1; k<bs; k++) rows[k] = rows[k-1] + 1;
1372       ierr   = MatSetValues_SeqBAIJ(C,bs,rows,bs,cols,array,INSERT_VALUES);CHKERRQ(ierr);
1373       array += bs2;
1374     }
1375   }
1376   ierr = PetscFree2(rows,cols);CHKERRQ(ierr);
1377 
1378   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1379   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1380 
1381   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1382     *B = C;
1383   } else {
1384     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1390 {
1391   PetscErrorCode ierr;
1392   Mat            Btrans;
1393 
1394   PetscFunctionBegin;
1395   *f   = PETSC_FALSE;
1396   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1397   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1398   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1403 {
1404   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1405   PetscErrorCode ierr;
1406   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1407   int            fd;
1408   PetscScalar    *aa;
1409   FILE           *file;
1410 
1411   PetscFunctionBegin;
1412   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1413   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1414   col_lens[0] = MAT_FILE_CLASSID;
1415 
1416   col_lens[1] = A->rmap->N;
1417   col_lens[2] = A->cmap->n;
1418   col_lens[3] = a->nz*bs2;
1419 
1420   /* store lengths of each row and write (including header) to file */
1421   count = 0;
1422   for (i=0; i<a->mbs; i++) {
1423     for (j=0; j<bs; j++) {
1424       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1425     }
1426   }
1427   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1428   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1429 
1430   /* store column indices (zero start index) */
1431   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1432   count = 0;
1433   for (i=0; i<a->mbs; i++) {
1434     for (j=0; j<bs; j++) {
1435       for (k=a->i[i]; k<a->i[i+1]; k++) {
1436         for (l=0; l<bs; l++) {
1437           jj[count++] = bs*a->j[k] + l;
1438         }
1439       }
1440     }
1441   }
1442   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1443   ierr = PetscFree(jj);CHKERRQ(ierr);
1444 
1445   /* store nonzero values */
1446   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1447   count = 0;
1448   for (i=0; i<a->mbs; i++) {
1449     for (j=0; j<bs; j++) {
1450       for (k=a->i[i]; k<a->i[i+1]; k++) {
1451         for (l=0; l<bs; l++) {
1452           aa[count++] = a->a[bs2*k + l*bs + j];
1453         }
1454       }
1455     }
1456   }
1457   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1458   ierr = PetscFree(aa);CHKERRQ(ierr);
1459 
1460   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1461   if (file) {
1462     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1463   }
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1468 {
1469   PetscErrorCode ierr;
1470   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1471   PetscInt       i,bs = A->rmap->bs,k;
1472 
1473   PetscFunctionBegin;
1474   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1475   for (i=0; i<a->mbs; i++) {
1476     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1477     for (k=a->i[i]; k<a->i[i+1]; k++) {
1478       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1479     }
1480     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1481   }
1482   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1483   PetscFunctionReturn(0);
1484 }
1485 
1486 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1487 {
1488   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1489   PetscErrorCode    ierr;
1490   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1491   PetscViewerFormat format;
1492 
1493   PetscFunctionBegin;
1494   if (A->structure_only) {
1495     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1496     PetscFunctionReturn(0);
1497   }
1498 
1499   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1500   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1501     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1502   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1503     const char *matname;
1504     Mat        aij;
1505     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1506     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1507     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1508     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1509     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1510   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1511       PetscFunctionReturn(0);
1512   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1513     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1514     for (i=0; i<a->mbs; i++) {
1515       for (j=0; j<bs; j++) {
1516         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1517         for (k=a->i[i]; k<a->i[i+1]; k++) {
1518           for (l=0; l<bs; l++) {
1519 #if defined(PETSC_USE_COMPLEX)
1520             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1521               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1522                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1523             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1524               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1525                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1526             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1527               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1528             }
1529 #else
1530             if (a->a[bs2*k + l*bs + j] != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1532             }
1533 #endif
1534           }
1535         }
1536         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1537       }
1538     }
1539     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1540   } else {
1541     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1542     for (i=0; i<a->mbs; i++) {
1543       for (j=0; j<bs; j++) {
1544         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1545         for (k=a->i[i]; k<a->i[i+1]; k++) {
1546           for (l=0; l<bs; l++) {
1547 #if defined(PETSC_USE_COMPLEX)
1548             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1549               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1550                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1551             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1552               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1553                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1554             } else {
1555               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1556             }
1557 #else
1558             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1559 #endif
1560           }
1561         }
1562         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1563       }
1564     }
1565     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1566   }
1567   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1568   PetscFunctionReturn(0);
1569 }
1570 
1571 #include <petscdraw.h>
1572 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1573 {
1574   Mat               A = (Mat) Aa;
1575   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1576   PetscErrorCode    ierr;
1577   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1578   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1579   MatScalar         *aa;
1580   PetscViewer       viewer;
1581   PetscViewerFormat format;
1582 
1583   PetscFunctionBegin;
1584   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1585   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1586   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1587 
1588   /* loop over matrix elements drawing boxes */
1589 
1590   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1591     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1592     /* Blue for negative, Cyan for zero and  Red for positive */
1593     color = PETSC_DRAW_BLUE;
1594     for (i=0,row=0; i<mbs; i++,row+=bs) {
1595       for (j=a->i[i]; j<a->i[i+1]; j++) {
1596         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1597         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1598         aa  = a->a + j*bs2;
1599         for (k=0; k<bs; k++) {
1600           for (l=0; l<bs; l++) {
1601             if (PetscRealPart(*aa++) >=  0.) continue;
1602             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1603           }
1604         }
1605       }
1606     }
1607     color = PETSC_DRAW_CYAN;
1608     for (i=0,row=0; i<mbs; i++,row+=bs) {
1609       for (j=a->i[i]; j<a->i[i+1]; j++) {
1610         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1611         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1612         aa  = a->a + j*bs2;
1613         for (k=0; k<bs; k++) {
1614           for (l=0; l<bs; l++) {
1615             if (PetscRealPart(*aa++) != 0.) continue;
1616             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1617           }
1618         }
1619       }
1620     }
1621     color = PETSC_DRAW_RED;
1622     for (i=0,row=0; i<mbs; i++,row+=bs) {
1623       for (j=a->i[i]; j<a->i[i+1]; j++) {
1624         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1625         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1626         aa  = a->a + j*bs2;
1627         for (k=0; k<bs; k++) {
1628           for (l=0; l<bs; l++) {
1629             if (PetscRealPart(*aa++) <= 0.) continue;
1630             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1631           }
1632         }
1633       }
1634     }
1635     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1636   } else {
1637     /* use contour shading to indicate magnitude of values */
1638     /* first determine max of all nonzero values */
1639     PetscReal minv = 0.0, maxv = 0.0;
1640     PetscDraw popup;
1641 
1642     for (i=0; i<a->nz*a->bs2; i++) {
1643       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1644     }
1645     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1646     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1647     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1648 
1649     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1650     for (i=0,row=0; i<mbs; i++,row+=bs) {
1651       for (j=a->i[i]; j<a->i[i+1]; j++) {
1652         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1653         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1654         aa  = a->a + j*bs2;
1655         for (k=0; k<bs; k++) {
1656           for (l=0; l<bs; l++) {
1657             MatScalar v = *aa++;
1658             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1659             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1660           }
1661         }
1662       }
1663     }
1664     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1665   }
1666   PetscFunctionReturn(0);
1667 }
1668 
1669 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1670 {
1671   PetscErrorCode ierr;
1672   PetscReal      xl,yl,xr,yr,w,h;
1673   PetscDraw      draw;
1674   PetscBool      isnull;
1675 
1676   PetscFunctionBegin;
1677   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1678   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1679   if (isnull) PetscFunctionReturn(0);
1680 
1681   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1682   xr  += w;          yr += h;        xl = -w;     yl = -h;
1683   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1684   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1685   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1686   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1687   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1688   PetscFunctionReturn(0);
1689 }
1690 
1691 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1692 {
1693   PetscErrorCode ierr;
1694   PetscBool      iascii,isbinary,isdraw;
1695 
1696   PetscFunctionBegin;
1697   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1698   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1699   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1700   if (iascii) {
1701     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1702   } else if (isbinary) {
1703     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1704   } else if (isdraw) {
1705     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1706   } else {
1707     Mat B;
1708     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1709     ierr = MatView(B,viewer);CHKERRQ(ierr);
1710     ierr = MatDestroy(&B);CHKERRQ(ierr);
1711   }
1712   PetscFunctionReturn(0);
1713 }
1714 
1715 
1716 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1717 {
1718   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1719   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1720   PetscInt    *ai = a->i,*ailen = a->ilen;
1721   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1722   MatScalar   *ap,*aa = a->a;
1723 
1724   PetscFunctionBegin;
1725   for (k=0; k<m; k++) { /* loop over rows */
1726     row = im[k]; brow = row/bs;
1727     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1728     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1729     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1730     nrow = ailen[brow];
1731     for (l=0; l<n; l++) { /* loop over columns */
1732       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1733       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1734       col  = in[l];
1735       bcol = col/bs;
1736       cidx = col%bs;
1737       ridx = row%bs;
1738       high = nrow;
1739       low  = 0; /* assume unsorted */
1740       while (high-low > 5) {
1741         t = (low+high)/2;
1742         if (rp[t] > bcol) high = t;
1743         else             low  = t;
1744       }
1745       for (i=low; i<high; i++) {
1746         if (rp[i] > bcol) break;
1747         if (rp[i] == bcol) {
1748           *v++ = ap[bs2*i+bs*cidx+ridx];
1749           goto finished;
1750         }
1751       }
1752       *v++ = 0.0;
1753 finished:;
1754     }
1755   }
1756   PetscFunctionReturn(0);
1757 }
1758 
1759 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1760 {
1761   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1762   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1763   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1764   PetscErrorCode    ierr;
1765   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1766   PetscBool         roworiented=a->roworiented;
1767   const PetscScalar *value     = v;
1768   MatScalar         *ap=NULL,*aa = a->a,*bap;
1769 
1770   PetscFunctionBegin;
1771   if (roworiented) {
1772     stepval = (n-1)*bs;
1773   } else {
1774     stepval = (m-1)*bs;
1775   }
1776   for (k=0; k<m; k++) { /* loop over added rows */
1777     row = im[k];
1778     if (row < 0) continue;
1779 #if defined(PETSC_USE_DEBUG)
1780     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1781 #endif
1782     rp   = aj + ai[row];
1783     if (!A->structure_only) ap = aa + bs2*ai[row];
1784     rmax = imax[row];
1785     nrow = ailen[row];
1786     low  = 0;
1787     high = nrow;
1788     for (l=0; l<n; l++) { /* loop over added columns */
1789       if (in[l] < 0) continue;
1790 #if defined(PETSC_USE_DEBUG)
1791       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1792 #endif
1793       col = in[l];
1794       if (!A->structure_only) {
1795         if (roworiented) {
1796           value = v + (k*(stepval+bs) + l)*bs;
1797         } else {
1798           value = v + (l*(stepval+bs) + k)*bs;
1799         }
1800       }
1801       if (col <= lastcol) low = 0;
1802       else high = nrow;
1803       lastcol = col;
1804       while (high-low > 7) {
1805         t = (low+high)/2;
1806         if (rp[t] > col) high = t;
1807         else             low  = t;
1808       }
1809       for (i=low; i<high; i++) {
1810         if (rp[i] > col) break;
1811         if (rp[i] == col) {
1812           if (A->structure_only) goto noinsert2;
1813           bap = ap +  bs2*i;
1814           if (roworiented) {
1815             if (is == ADD_VALUES) {
1816               for (ii=0; ii<bs; ii++,value+=stepval) {
1817                 for (jj=ii; jj<bs2; jj+=bs) {
1818                   bap[jj] += *value++;
1819                 }
1820               }
1821             } else {
1822               for (ii=0; ii<bs; ii++,value+=stepval) {
1823                 for (jj=ii; jj<bs2; jj+=bs) {
1824                   bap[jj] = *value++;
1825                 }
1826               }
1827             }
1828           } else {
1829             if (is == ADD_VALUES) {
1830               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1831                 for (jj=0; jj<bs; jj++) {
1832                   bap[jj] += value[jj];
1833                 }
1834                 bap += bs;
1835               }
1836             } else {
1837               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1838                 for (jj=0; jj<bs; jj++) {
1839                   bap[jj]  = value[jj];
1840                 }
1841                 bap += bs;
1842               }
1843             }
1844           }
1845           goto noinsert2;
1846         }
1847       }
1848       if (nonew == 1) goto noinsert2;
1849       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1850       if (A->structure_only) {
1851         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1852       } else {
1853         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1854       }
1855       N = nrow++ - 1; high++;
1856       /* shift up all the later entries in this row */
1857       for (ii=N; ii>=i; ii--) {
1858         rp[ii+1] = rp[ii];
1859         if (!A->structure_only) {
1860           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1861         }
1862       }
1863       if (N >= i && !A->structure_only) {
1864         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1865       }
1866 
1867       rp[i] = col;
1868       if (!A->structure_only) {
1869         bap   = ap +  bs2*i;
1870         if (roworiented) {
1871           for (ii=0; ii<bs; ii++,value+=stepval) {
1872             for (jj=ii; jj<bs2; jj+=bs) {
1873               bap[jj] = *value++;
1874             }
1875           }
1876         } else {
1877           for (ii=0; ii<bs; ii++,value+=stepval) {
1878             for (jj=0; jj<bs; jj++) {
1879               *bap++ = *value++;
1880             }
1881           }
1882         }
1883       }
1884 noinsert2:;
1885       low = i;
1886     }
1887     ailen[row] = nrow;
1888   }
1889   PetscFunctionReturn(0);
1890 }
1891 
1892 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1893 {
1894   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1895   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1896   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1897   PetscErrorCode ierr;
1898   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1899   MatScalar      *aa  = a->a,*ap;
1900   PetscReal      ratio=0.6;
1901 
1902   PetscFunctionBegin;
1903   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1904 
1905   if (m) rmax = ailen[0];
1906   for (i=1; i<mbs; i++) {
1907     /* move each row back by the amount of empty slots (fshift) before it*/
1908     fshift += imax[i-1] - ailen[i-1];
1909     rmax    = PetscMax(rmax,ailen[i]);
1910     if (fshift) {
1911       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1912       N  = ailen[i];
1913       for (j=0; j<N; j++) {
1914         ip[j-fshift] = ip[j];
1915         if (!A->structure_only) {
1916           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1917         }
1918       }
1919     }
1920     ai[i] = ai[i-1] + ailen[i-1];
1921   }
1922   if (mbs) {
1923     fshift += imax[mbs-1] - ailen[mbs-1];
1924     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1925   }
1926 
1927   /* reset ilen and imax for each row */
1928   a->nonzerorowcnt = 0;
1929   if (A->structure_only) {
1930     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1931   } else { /* !A->structure_only */
1932     for (i=0; i<mbs; i++) {
1933       ailen[i] = imax[i] = ai[i+1] - ai[i];
1934       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1935     }
1936   }
1937   a->nz = ai[mbs];
1938 
1939   /* diagonals may have moved, so kill the diagonal pointers */
1940   a->idiagvalid = PETSC_FALSE;
1941   if (fshift && a->diag) {
1942     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1943     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1944     a->diag = 0;
1945   }
1946   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1947   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1948   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1949   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1950 
1951   A->info.mallocs    += a->reallocs;
1952   a->reallocs         = 0;
1953   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1954   a->rmax             = rmax;
1955 
1956   if (!A->structure_only) {
1957     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1958   }
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 /*
1963    This function returns an array of flags which indicate the locations of contiguous
1964    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1965    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1966    Assume: sizes should be long enough to hold all the values.
1967 */
1968 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1969 {
1970   PetscInt  i,j,k,row;
1971   PetscBool flg;
1972 
1973   PetscFunctionBegin;
1974   for (i=0,j=0; i<n; j++) {
1975     row = idx[i];
1976     if (row%bs!=0) { /* Not the begining of a block */
1977       sizes[j] = 1;
1978       i++;
1979     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1980       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1981       i++;
1982     } else { /* Begining of the block, so check if the complete block exists */
1983       flg = PETSC_TRUE;
1984       for (k=1; k<bs; k++) {
1985         if (row+k != idx[i+k]) { /* break in the block */
1986           flg = PETSC_FALSE;
1987           break;
1988         }
1989       }
1990       if (flg) { /* No break in the bs */
1991         sizes[j] = bs;
1992         i       += bs;
1993       } else {
1994         sizes[j] = 1;
1995         i++;
1996       }
1997     }
1998   }
1999   *bs_max = j;
2000   PetscFunctionReturn(0);
2001 }
2002 
2003 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2004 {
2005   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2006   PetscErrorCode    ierr;
2007   PetscInt          i,j,k,count,*rows;
2008   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2009   PetscScalar       zero = 0.0;
2010   MatScalar         *aa;
2011   const PetscScalar *xx;
2012   PetscScalar       *bb;
2013 
2014   PetscFunctionBegin;
2015   /* fix right hand side if needed */
2016   if (x && b) {
2017     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2018     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2019     for (i=0; i<is_n; i++) {
2020       bb[is_idx[i]] = diag*xx[is_idx[i]];
2021     }
2022     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2023     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2024   }
2025 
2026   /* Make a copy of the IS and  sort it */
2027   /* allocate memory for rows,sizes */
2028   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2029 
2030   /* copy IS values to rows, and sort them */
2031   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2032   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2033 
2034   if (baij->keepnonzeropattern) {
2035     for (i=0; i<is_n; i++) sizes[i] = 1;
2036     bs_max          = is_n;
2037   } else {
2038     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2039     A->nonzerostate++;
2040   }
2041 
2042   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2043     row = rows[j];
2044     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2045     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2046     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2047     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2048       if (diag != (PetscScalar)0.0) {
2049         if (baij->ilen[row/bs] > 0) {
2050           baij->ilen[row/bs]       = 1;
2051           baij->j[baij->i[row/bs]] = row/bs;
2052 
2053           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2054         }
2055         /* Now insert all the diagonal values for this bs */
2056         for (k=0; k<bs; k++) {
2057           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2058         }
2059       } else { /* (diag == 0.0) */
2060         baij->ilen[row/bs] = 0;
2061       } /* end (diag == 0.0) */
2062     } else { /* (sizes[i] != bs) */
2063 #if defined(PETSC_USE_DEBUG)
2064       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2065 #endif
2066       for (k=0; k<count; k++) {
2067         aa[0] =  zero;
2068         aa   += bs;
2069       }
2070       if (diag != (PetscScalar)0.0) {
2071         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072       }
2073     }
2074   }
2075 
2076   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2077   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2078   PetscFunctionReturn(0);
2079 }
2080 
2081 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2082 {
2083   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2084   PetscErrorCode    ierr;
2085   PetscInt          i,j,k,count;
2086   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2087   PetscScalar       zero = 0.0;
2088   MatScalar         *aa;
2089   const PetscScalar *xx;
2090   PetscScalar       *bb;
2091   PetscBool         *zeroed,vecs = PETSC_FALSE;
2092 
2093   PetscFunctionBegin;
2094   /* fix right hand side if needed */
2095   if (x && b) {
2096     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2097     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2098     vecs = PETSC_TRUE;
2099   }
2100 
2101   /* zero the columns */
2102   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2103   for (i=0; i<is_n; i++) {
2104     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2105     zeroed[is_idx[i]] = PETSC_TRUE;
2106   }
2107   for (i=0; i<A->rmap->N; i++) {
2108     if (!zeroed[i]) {
2109       row = i/bs;
2110       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2111         for (k=0; k<bs; k++) {
2112           col = bs*baij->j[j] + k;
2113           if (zeroed[col]) {
2114             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2115             if (vecs) bb[i] -= aa[0]*xx[col];
2116             aa[0] = 0.0;
2117           }
2118         }
2119       }
2120     } else if (vecs) bb[i] = diag*xx[i];
2121   }
2122   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2123   if (vecs) {
2124     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2125     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2126   }
2127 
2128   /* zero the rows */
2129   for (i=0; i<is_n; i++) {
2130     row   = is_idx[i];
2131     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2132     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2133     for (k=0; k<count; k++) {
2134       aa[0] =  zero;
2135       aa   += bs;
2136     }
2137     if (diag != (PetscScalar)0.0) {
2138       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2139     }
2140   }
2141   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2146 {
2147   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2148   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2149   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2150   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2151   PetscErrorCode ierr;
2152   PetscInt       ridx,cidx,bs2=a->bs2;
2153   PetscBool      roworiented=a->roworiented;
2154   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2155 
2156   PetscFunctionBegin;
2157   for (k=0; k<m; k++) { /* loop over added rows */
2158     row  = im[k];
2159     brow = row/bs;
2160     if (row < 0) continue;
2161 #if defined(PETSC_USE_DEBUG)
2162     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2163 #endif
2164     rp   = aj + ai[brow];
2165     if (!A->structure_only) ap = aa + bs2*ai[brow];
2166     rmax = imax[brow];
2167     nrow = ailen[brow];
2168     low  = 0;
2169     high = nrow;
2170     for (l=0; l<n; l++) { /* loop over added columns */
2171       if (in[l] < 0) continue;
2172 #if defined(PETSC_USE_DEBUG)
2173       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2174 #endif
2175       col  = in[l]; bcol = col/bs;
2176       ridx = row % bs; cidx = col % bs;
2177       if (!A->structure_only) {
2178         if (roworiented) {
2179           value = v[l + k*n];
2180         } else {
2181           value = v[k + l*m];
2182         }
2183       }
2184       if (col <= lastcol) low = 0; else high = nrow;
2185       lastcol = col;
2186       while (high-low > 7) {
2187         t = (low+high)/2;
2188         if (rp[t] > bcol) high = t;
2189         else              low  = t;
2190       }
2191       for (i=low; i<high; i++) {
2192         if (rp[i] > bcol) break;
2193         if (rp[i] == bcol) {
2194           bap = ap +  bs2*i + bs*cidx + ridx;
2195           if (!A->structure_only) {
2196             if (is == ADD_VALUES) *bap += value;
2197             else                  *bap  = value;
2198           }
2199           goto noinsert1;
2200         }
2201       }
2202       if (nonew == 1) goto noinsert1;
2203       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2204       if (A->structure_only) {
2205         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2206       } else {
2207         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2208       }
2209       N = nrow++ - 1; high++;
2210       /* shift up all the later entries in this row */
2211       for (ii=N; ii>=i; ii--) {
2212         rp[ii+1] = rp[ii];
2213         if (!A->structure_only) {
2214           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2215         }
2216       }
2217       if (N>=i && !A->structure_only) {
2218         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2219       }
2220       rp[i]                      = bcol;
2221       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2222       a->nz++;
2223       A->nonzerostate++;
2224 noinsert1:;
2225       low = i;
2226     }
2227     ailen[brow] = nrow;
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2233 {
2234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2235   Mat            outA;
2236   PetscErrorCode ierr;
2237   PetscBool      row_identity,col_identity;
2238 
2239   PetscFunctionBegin;
2240   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2241   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2242   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2243   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2244 
2245   outA            = inA;
2246   inA->factortype = MAT_FACTOR_LU;
2247   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2248   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2249 
2250   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2251 
2252   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2254   a->row = row;
2255   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2256   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2257   a->col = col;
2258 
2259   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2260   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2261   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2262   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2263 
2264   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2265   if (!a->solve_work) {
2266     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2267     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2268   }
2269   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2274 {
2275   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2276   PetscInt    i,nz,mbs;
2277 
2278   PetscFunctionBegin;
2279   nz  = baij->maxnz;
2280   mbs = baij->mbs;
2281   for (i=0; i<nz; i++) {
2282     baij->j[i] = indices[i];
2283   }
2284   baij->nz = nz;
2285   for (i=0; i<mbs; i++) {
2286     baij->ilen[i] = baij->imax[i];
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 /*@
2292     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2293        in the matrix.
2294 
2295   Input Parameters:
2296 +  mat - the SeqBAIJ matrix
2297 -  indices - the column indices
2298 
2299   Level: advanced
2300 
2301   Notes:
2302     This can be called if you have precomputed the nonzero structure of the
2303   matrix and want to provide it to the matrix object to improve the performance
2304   of the MatSetValues() operation.
2305 
2306     You MUST have set the correct numbers of nonzeros per row in the call to
2307   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2308 
2309     MUST be called before any calls to MatSetValues();
2310 
2311 @*/
2312 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2313 {
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2318   PetscValidPointer(indices,2);
2319   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2324 {
2325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2328   PetscReal      atmp;
2329   PetscScalar    *x,zero = 0.0;
2330   MatScalar      *aa;
2331   PetscInt       ncols,brow,krow,kcol;
2332 
2333   PetscFunctionBegin;
2334   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2335   bs  = A->rmap->bs;
2336   aa  = a->a;
2337   ai  = a->i;
2338   aj  = a->j;
2339   mbs = a->mbs;
2340 
2341   ierr = VecSet(v,zero);CHKERRQ(ierr);
2342   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2343   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2344   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2345   for (i=0; i<mbs; i++) {
2346     ncols = ai[1] - ai[0]; ai++;
2347     brow  = bs*i;
2348     for (j=0; j<ncols; j++) {
2349       for (kcol=0; kcol<bs; kcol++) {
2350         for (krow=0; krow<bs; krow++) {
2351           atmp = PetscAbsScalar(*aa);aa++;
2352           row  = brow + krow;   /* row index */
2353           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2354         }
2355       }
2356       aj++;
2357     }
2358   }
2359   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2364 {
2365   PetscErrorCode ierr;
2366 
2367   PetscFunctionBegin;
2368   /* If the two matrices have the same copy implementation, use fast copy. */
2369   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2370     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2371     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2372     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2373 
2374     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2375     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2376     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2377     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2378   } else {
2379     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2380   }
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2385 {
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2394 {
2395   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2396 
2397   PetscFunctionBegin;
2398   *array = a->a;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2403 {
2404   PetscFunctionBegin;
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2409 {
2410   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2411   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2412   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2413   PetscErrorCode ierr;
2414 
2415   PetscFunctionBegin;
2416   /* Set the number of nonzeros in the new matrix */
2417   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2422 {
2423   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2424   PetscErrorCode ierr;
2425   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2426   PetscBLASInt   one=1;
2427 
2428   PetscFunctionBegin;
2429   if (str == SAME_NONZERO_PATTERN) {
2430     PetscScalar  alpha = a;
2431     PetscBLASInt bnz;
2432     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2433     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2434     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2435   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2436     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2437   } else {
2438     Mat      B;
2439     PetscInt *nnz;
2440     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2441     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2442     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2443     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2444     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2445     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2446     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2447     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2448     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2449     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2450     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2451     ierr = PetscFree(nnz);CHKERRQ(ierr);
2452   }
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2457 {
2458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2459   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2460   MatScalar   *aa = a->a;
2461 
2462   PetscFunctionBegin;
2463   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2468 {
2469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2470   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2471   MatScalar   *aa = a->a;
2472 
2473   PetscFunctionBegin;
2474   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 /*
2479     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2480 */
2481 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2482 {
2483   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2484   PetscErrorCode ierr;
2485   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2486   PetscInt       nz = a->i[m],row,*jj,mr,col;
2487 
2488   PetscFunctionBegin;
2489   *nn = n;
2490   if (!ia) PetscFunctionReturn(0);
2491   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2492   else {
2493     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2494     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2495     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2496     jj   = a->j;
2497     for (i=0; i<nz; i++) {
2498       collengths[jj[i]]++;
2499     }
2500     cia[0] = oshift;
2501     for (i=0; i<n; i++) {
2502       cia[i+1] = cia[i] + collengths[i];
2503     }
2504     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2505     jj   = a->j;
2506     for (row=0; row<m; row++) {
2507       mr = a->i[row+1] - a->i[row];
2508       for (i=0; i<mr; i++) {
2509         col = *jj++;
2510 
2511         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2512       }
2513     }
2514     ierr = PetscFree(collengths);CHKERRQ(ierr);
2515     *ia  = cia; *ja = cja;
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2521 {
2522   PetscErrorCode ierr;
2523 
2524   PetscFunctionBegin;
2525   if (!ia) PetscFunctionReturn(0);
2526   ierr = PetscFree(*ia);CHKERRQ(ierr);
2527   ierr = PetscFree(*ja);CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 /*
2532  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2533  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2534  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2535  */
2536 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2537 {
2538   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2539   PetscErrorCode ierr;
2540   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2541   PetscInt       nz = a->i[m],row,*jj,mr,col;
2542   PetscInt       *cspidx;
2543 
2544   PetscFunctionBegin;
2545   *nn = n;
2546   if (!ia) PetscFunctionReturn(0);
2547 
2548   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2550   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2551   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2552   jj   = a->j;
2553   for (i=0; i<nz; i++) {
2554     collengths[jj[i]]++;
2555   }
2556   cia[0] = oshift;
2557   for (i=0; i<n; i++) {
2558     cia[i+1] = cia[i] + collengths[i];
2559   }
2560   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2561   jj   = a->j;
2562   for (row=0; row<m; row++) {
2563     mr = a->i[row+1] - a->i[row];
2564     for (i=0; i<mr; i++) {
2565       col = *jj++;
2566       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2567       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2568     }
2569   }
2570   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2571   *ia    = cia; *ja = cja;
2572   *spidx = cspidx;
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2577 {
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2582   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2587 {
2588   PetscErrorCode ierr;
2589   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2590 
2591   PetscFunctionBegin;
2592   if (!Y->preallocated || !aij->nz) {
2593     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2594   }
2595   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 /* -------------------------------------------------------------------*/
2600 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2601                                        MatGetRow_SeqBAIJ,
2602                                        MatRestoreRow_SeqBAIJ,
2603                                        MatMult_SeqBAIJ_N,
2604                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2605                                        MatMultTranspose_SeqBAIJ,
2606                                        MatMultTransposeAdd_SeqBAIJ,
2607                                        0,
2608                                        0,
2609                                        0,
2610                                /* 10*/ 0,
2611                                        MatLUFactor_SeqBAIJ,
2612                                        0,
2613                                        0,
2614                                        MatTranspose_SeqBAIJ,
2615                                /* 15*/ MatGetInfo_SeqBAIJ,
2616                                        MatEqual_SeqBAIJ,
2617                                        MatGetDiagonal_SeqBAIJ,
2618                                        MatDiagonalScale_SeqBAIJ,
2619                                        MatNorm_SeqBAIJ,
2620                                /* 20*/ 0,
2621                                        MatAssemblyEnd_SeqBAIJ,
2622                                        MatSetOption_SeqBAIJ,
2623                                        MatZeroEntries_SeqBAIJ,
2624                                /* 24*/ MatZeroRows_SeqBAIJ,
2625                                        0,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                /* 29*/ MatSetUp_SeqBAIJ,
2630                                        0,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                /* 34*/ MatDuplicate_SeqBAIJ,
2635                                        0,
2636                                        0,
2637                                        MatILUFactor_SeqBAIJ,
2638                                        0,
2639                                /* 39*/ MatAXPY_SeqBAIJ,
2640                                        MatCreateSubMatrices_SeqBAIJ,
2641                                        MatIncreaseOverlap_SeqBAIJ,
2642                                        MatGetValues_SeqBAIJ,
2643                                        MatCopy_SeqBAIJ,
2644                                /* 44*/ 0,
2645                                        MatScale_SeqBAIJ,
2646                                        MatShift_SeqBAIJ,
2647                                        0,
2648                                        MatZeroRowsColumns_SeqBAIJ,
2649                                /* 49*/ 0,
2650                                        MatGetRowIJ_SeqBAIJ,
2651                                        MatRestoreRowIJ_SeqBAIJ,
2652                                        MatGetColumnIJ_SeqBAIJ,
2653                                        MatRestoreColumnIJ_SeqBAIJ,
2654                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2655                                        0,
2656                                        0,
2657                                        0,
2658                                        MatSetValuesBlocked_SeqBAIJ,
2659                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2660                                        MatDestroy_SeqBAIJ,
2661                                        MatView_SeqBAIJ,
2662                                        0,
2663                                        0,
2664                                /* 64*/ 0,
2665                                        0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2670                                        0,
2671                                        MatConvert_Basic,
2672                                        0,
2673                                        0,
2674                                /* 74*/ 0,
2675                                        MatFDColoringApply_BAIJ,
2676                                        0,
2677                                        0,
2678                                        0,
2679                                /* 79*/ 0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                        MatLoad_SeqBAIJ,
2684                                /* 84*/ 0,
2685                                        0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                /* 89*/ 0,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                /* 94*/ 0,
2695                                        0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                /* 99*/ 0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                /*104*/ 0,
2705                                        MatRealPart_SeqBAIJ,
2706                                        MatImaginaryPart_SeqBAIJ,
2707                                        0,
2708                                        0,
2709                                /*109*/ 0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                        MatMissingDiagonal_SeqBAIJ,
2714                                /*114*/ 0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                /*119*/ 0,
2720                                        0,
2721                                        MatMultHermitianTranspose_SeqBAIJ,
2722                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2723                                        0,
2724                                /*124*/ 0,
2725                                        0,
2726                                        MatInvertBlockDiagonal_SeqBAIJ,
2727                                        0,
2728                                        0,
2729                                /*129*/ 0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                /*134*/ 0,
2735                                        0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                /*139*/ MatSetBlockSizes_Default,
2740                                        0,
2741                                        0,
2742                                        MatFDColoringSetUp_SeqXAIJ,
2743                                        0,
2744                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2745                                        MatDestroySubMatrices_SeqBAIJ
2746 };
2747 
2748 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2749 {
2750   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2751   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2752   PetscErrorCode ierr;
2753 
2754   PetscFunctionBegin;
2755   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2756 
2757   /* allocate space for values if not already there */
2758   if (!aij->saved_values) {
2759     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2760     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2761   }
2762 
2763   /* copy values over */
2764   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2769 {
2770   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2771   PetscErrorCode ierr;
2772   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2773 
2774   PetscFunctionBegin;
2775   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2776   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2777 
2778   /* copy values over */
2779   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2785 
2786 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2787 {
2788   Mat_SeqBAIJ    *b;
2789   PetscErrorCode ierr;
2790   PetscInt       i,mbs,nbs,bs2;
2791   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2792 
2793   PetscFunctionBegin;
2794   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2795   if (nz == MAT_SKIP_ALLOCATION) {
2796     skipallocation = PETSC_TRUE;
2797     nz             = 0;
2798   }
2799 
2800   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2804 
2805   B->preallocated = PETSC_TRUE;
2806 
2807   mbs = B->rmap->n/bs;
2808   nbs = B->cmap->n/bs;
2809   bs2 = bs*bs;
2810 
2811   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2812 
2813   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2814   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2815   if (nnz) {
2816     for (i=0; i<mbs; i++) {
2817       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2818       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2819     }
2820   }
2821 
2822   b    = (Mat_SeqBAIJ*)B->data;
2823   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2824   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2825   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2826 
2827   if (!flg) {
2828     switch (bs) {
2829     case 1:
2830       B->ops->mult    = MatMult_SeqBAIJ_1;
2831       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2832       break;
2833     case 2:
2834       B->ops->mult    = MatMult_SeqBAIJ_2;
2835       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2836       break;
2837     case 3:
2838       B->ops->mult    = MatMult_SeqBAIJ_3;
2839       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2840       break;
2841     case 4:
2842       B->ops->mult    = MatMult_SeqBAIJ_4;
2843       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2844       break;
2845     case 5:
2846       B->ops->mult    = MatMult_SeqBAIJ_5;
2847       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2848       break;
2849     case 6:
2850       B->ops->mult    = MatMult_SeqBAIJ_6;
2851       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2852       break;
2853     case 7:
2854       B->ops->mult    = MatMult_SeqBAIJ_7;
2855       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2856       break;
2857     case 9:
2858 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2859       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2860       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2861 #else
2862       B->ops->mult    = MatMult_SeqBAIJ_N;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2864 #endif
2865       break;
2866     case 11:
2867       B->ops->mult    = MatMult_SeqBAIJ_11;
2868       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2869       break;
2870     case 15:
2871       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2872       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2873       break;
2874     default:
2875       B->ops->mult    = MatMult_SeqBAIJ_N;
2876       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2877       break;
2878     }
2879   }
2880   B->ops->sor = MatSOR_SeqBAIJ;
2881   b->mbs = mbs;
2882   b->nbs = nbs;
2883   if (!skipallocation) {
2884     if (!b->imax) {
2885       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2886       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2887 
2888       b->free_imax_ilen = PETSC_TRUE;
2889     }
2890     /* b->ilen will count nonzeros in each block row so far. */
2891     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2892     if (!nnz) {
2893       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2894       else if (nz < 0) nz = 1;
2895       for (i=0; i<mbs; i++) b->imax[i] = nz;
2896       nz = nz*mbs;
2897     } else {
2898       nz = 0;
2899       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2900     }
2901 
2902     /* allocate the matrix space */
2903     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2904     if (B->structure_only) {
2905       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2906       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2907       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2908     } else {
2909       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2910       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2911       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2912     }
2913     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2914 
2915     if (B->structure_only) {
2916       b->singlemalloc = PETSC_FALSE;
2917       b->free_a       = PETSC_FALSE;
2918     } else {
2919       b->singlemalloc = PETSC_TRUE;
2920       b->free_a       = PETSC_TRUE;
2921     }
2922     b->free_ij = PETSC_TRUE;
2923 
2924     b->i[0] = 0;
2925     for (i=1; i<mbs+1; i++) {
2926       b->i[i] = b->i[i-1] + b->imax[i-1];
2927     }
2928 
2929   } else {
2930     b->free_a  = PETSC_FALSE;
2931     b->free_ij = PETSC_FALSE;
2932   }
2933 
2934   b->bs2              = bs2;
2935   b->mbs              = mbs;
2936   b->nz               = 0;
2937   b->maxnz            = nz;
2938   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2939   B->was_assembled    = PETSC_FALSE;
2940   B->assembled        = PETSC_FALSE;
2941   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2942   PetscFunctionReturn(0);
2943 }
2944 
2945 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2946 {
2947   PetscInt       i,m,nz,nz_max=0,*nnz;
2948   PetscScalar    *values=0;
2949   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2950   PetscErrorCode ierr;
2951 
2952   PetscFunctionBegin;
2953   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2954   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2955   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2956   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2958   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2959   m    = B->rmap->n/bs;
2960 
2961   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2962   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2963   for (i=0; i<m; i++) {
2964     nz = ii[i+1]- ii[i];
2965     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2966     nz_max = PetscMax(nz_max, nz);
2967     nnz[i] = nz;
2968   }
2969   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2970   ierr = PetscFree(nnz);CHKERRQ(ierr);
2971 
2972   values = (PetscScalar*)V;
2973   if (!values) {
2974     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2975   }
2976   for (i=0; i<m; i++) {
2977     PetscInt          ncols  = ii[i+1] - ii[i];
2978     const PetscInt    *icols = jj + ii[i];
2979     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2980     if (!roworiented) {
2981       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2982     } else {
2983       PetscInt j;
2984       for (j=0; j<ncols; j++) {
2985         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2986         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2987       }
2988     }
2989   }
2990   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2991   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2992   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2993   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2994   PetscFunctionReturn(0);
2995 }
2996 
2997 /*MC
2998    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
2999    block sparse compressed row format.
3000 
3001    Options Database Keys:
3002 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3003 
3004   Level: beginner
3005 
3006 .seealso: MatCreateSeqBAIJ()
3007 M*/
3008 
3009 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3010 
3011 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3012 {
3013   PetscErrorCode ierr;
3014   PetscMPIInt    size;
3015   Mat_SeqBAIJ    *b;
3016 
3017   PetscFunctionBegin;
3018   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3019   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3020 
3021   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3022   B->data = (void*)b;
3023   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3024 
3025   b->row          = 0;
3026   b->col          = 0;
3027   b->icol         = 0;
3028   b->reallocs     = 0;
3029   b->saved_values = 0;
3030 
3031   b->roworiented        = PETSC_TRUE;
3032   b->nonew              = 0;
3033   b->diag               = 0;
3034   B->spptr              = 0;
3035   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3036   b->keepnonzeropattern = PETSC_FALSE;
3037 
3038   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3039   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3047 #if defined(PETSC_HAVE_HYPRE)
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_baij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3049 #endif
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3052   PetscFunctionReturn(0);
3053 }
3054 
3055 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3056 {
3057   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3058   PetscErrorCode ierr;
3059   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3060 
3061   PetscFunctionBegin;
3062   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3063 
3064   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3065     c->imax           = a->imax;
3066     c->ilen           = a->ilen;
3067     c->free_imax_ilen = PETSC_FALSE;
3068   } else {
3069     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3070     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3071     for (i=0; i<mbs; i++) {
3072       c->imax[i] = a->imax[i];
3073       c->ilen[i] = a->ilen[i];
3074     }
3075     c->free_imax_ilen = PETSC_TRUE;
3076   }
3077 
3078   /* allocate the matrix space */
3079   if (mallocmatspace) {
3080     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3081       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3082       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3083 
3084       c->i            = a->i;
3085       c->j            = a->j;
3086       c->singlemalloc = PETSC_FALSE;
3087       c->free_a       = PETSC_TRUE;
3088       c->free_ij      = PETSC_FALSE;
3089       c->parent       = A;
3090       C->preallocated = PETSC_TRUE;
3091       C->assembled    = PETSC_TRUE;
3092 
3093       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3094       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3095       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3096     } else {
3097       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3098       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3099 
3100       c->singlemalloc = PETSC_TRUE;
3101       c->free_a       = PETSC_TRUE;
3102       c->free_ij      = PETSC_TRUE;
3103 
3104       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3105       if (mbs > 0) {
3106         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3107         if (cpvalues == MAT_COPY_VALUES) {
3108           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3109         } else {
3110           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3111         }
3112       }
3113       C->preallocated = PETSC_TRUE;
3114       C->assembled    = PETSC_TRUE;
3115     }
3116   }
3117 
3118   c->roworiented = a->roworiented;
3119   c->nonew       = a->nonew;
3120 
3121   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3122   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3123 
3124   c->bs2         = a->bs2;
3125   c->mbs         = a->mbs;
3126   c->nbs         = a->nbs;
3127 
3128   if (a->diag) {
3129     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3130       c->diag      = a->diag;
3131       c->free_diag = PETSC_FALSE;
3132     } else {
3133       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3134       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3135       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3136       c->free_diag = PETSC_TRUE;
3137     }
3138   } else c->diag = 0;
3139 
3140   c->nz         = a->nz;
3141   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3142   c->solve_work = NULL;
3143   c->mult_work  = NULL;
3144   c->sor_workt  = NULL;
3145   c->sor_work   = NULL;
3146 
3147   c->compressedrow.use   = a->compressedrow.use;
3148   c->compressedrow.nrows = a->compressedrow.nrows;
3149   if (a->compressedrow.use) {
3150     i    = a->compressedrow.nrows;
3151     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3152     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3153     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3154     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3155   } else {
3156     c->compressedrow.use    = PETSC_FALSE;
3157     c->compressedrow.i      = NULL;
3158     c->compressedrow.rindex = NULL;
3159   }
3160   C->nonzerostate = A->nonzerostate;
3161 
3162   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3163   PetscFunctionReturn(0);
3164 }
3165 
3166 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3167 {
3168   PetscErrorCode ierr;
3169 
3170   PetscFunctionBegin;
3171   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3172   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3173   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3174   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3175   PetscFunctionReturn(0);
3176 }
3177 
3178 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3179 {
3180   Mat_SeqBAIJ    *a;
3181   PetscErrorCode ierr;
3182   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3183   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3184   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3185   PetscInt       *masked,nmask,tmp,bs2,ishift;
3186   PetscMPIInt    size;
3187   int            fd;
3188   PetscScalar    *aa;
3189   MPI_Comm       comm;
3190 
3191   PetscFunctionBegin;
3192   /* force binary viewer to load .info file if it has not yet done so */
3193   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3194   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3195   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3196   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3197   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3198   if (bs < 0) bs = 1;
3199   bs2  = bs*bs;
3200 
3201   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3202   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3203   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3204   ierr = PetscBinaryRead(fd,header,4,PETSC_INT);CHKERRQ(ierr);
3205   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3206   M = header[1]; N = header[2]; nz = header[3];
3207 
3208   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3209   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3210 
3211   /*
3212      This code adds extra rows to make sure the number of rows is
3213     divisible by the blocksize
3214   */
3215   mbs        = M/bs;
3216   extra_rows = bs - M + bs*(mbs);
3217   if (extra_rows == bs) extra_rows = 0;
3218   else mbs++;
3219   if (extra_rows) {
3220     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3221   }
3222 
3223   /* Set global sizes if not already set */
3224   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3225     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3226   } else { /* Check if the matrix global sizes are correct */
3227     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3228     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3229       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3230     }
3231     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3232   }
3233 
3234   /* read in row lengths */
3235   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3236   ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
3237   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3238 
3239   /* read in column indices */
3240   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3241   ierr = PetscBinaryRead(fd,jj,nz,PETSC_INT);CHKERRQ(ierr);
3242   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3243 
3244   /* loop over row lengths determining block row lengths */
3245   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3246   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3247   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3248   rowcount = 0;
3249   nzcount  = 0;
3250   for (i=0; i<mbs; i++) {
3251     nmask = 0;
3252     for (j=0; j<bs; j++) {
3253       kmax = rowlengths[rowcount];
3254       for (k=0; k<kmax; k++) {
3255         tmp = jj[nzcount++]/bs;
3256         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3257       }
3258       rowcount++;
3259     }
3260     browlengths[i] += nmask;
3261     /* zero out the mask elements we set */
3262     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3263   }
3264 
3265   /* Do preallocation  */
3266   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3267   a    = (Mat_SeqBAIJ*)newmat->data;
3268 
3269   /* set matrix "i" values */
3270   a->i[0] = 0;
3271   for (i=1; i<= mbs; i++) {
3272     a->i[i]      = a->i[i-1] + browlengths[i-1];
3273     a->ilen[i-1] = browlengths[i-1];
3274   }
3275   a->nz = 0;
3276   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3277 
3278   /* read in nonzero values */
3279   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3280   ierr = PetscBinaryRead(fd,aa,nz,PETSC_SCALAR);CHKERRQ(ierr);
3281   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3282 
3283   /* set "a" and "j" values into matrix */
3284   nzcount = 0; jcount = 0;
3285   for (i=0; i<mbs; i++) {
3286     nzcountb = nzcount;
3287     nmask    = 0;
3288     for (j=0; j<bs; j++) {
3289       kmax = rowlengths[i*bs+j];
3290       for (k=0; k<kmax; k++) {
3291         tmp = jj[nzcount++]/bs;
3292         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3293       }
3294     }
3295     /* sort the masked values */
3296     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3297 
3298     /* set "j" values into matrix */
3299     maskcount = 1;
3300     for (j=0; j<nmask; j++) {
3301       a->j[jcount++]  = masked[j];
3302       mask[masked[j]] = maskcount++;
3303     }
3304     /* set "a" values into matrix */
3305     ishift = bs2*a->i[i];
3306     for (j=0; j<bs; j++) {
3307       kmax = rowlengths[i*bs+j];
3308       for (k=0; k<kmax; k++) {
3309         tmp       = jj[nzcountb]/bs;
3310         block     = mask[tmp] - 1;
3311         point     = jj[nzcountb] - bs*tmp;
3312         idx       = ishift + bs2*block + j + bs*point;
3313         a->a[idx] = (MatScalar)aa[nzcountb++];
3314       }
3315     }
3316     /* zero out the mask elements we set */
3317     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3318   }
3319   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3320 
3321   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3322   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3323   ierr = PetscFree(aa);CHKERRQ(ierr);
3324   ierr = PetscFree(jj);CHKERRQ(ierr);
3325   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3326 
3327   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3328   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3329   PetscFunctionReturn(0);
3330 }
3331 
3332 /*@C
3333    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3334    compressed row) format.  For good matrix assembly performance the
3335    user should preallocate the matrix storage by setting the parameter nz
3336    (or the array nnz).  By setting these parameters accurately, performance
3337    during matrix assembly can be increased by more than a factor of 50.
3338 
3339    Collective on MPI_Comm
3340 
3341    Input Parameters:
3342 +  comm - MPI communicator, set to PETSC_COMM_SELF
3343 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3344           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3345 .  m - number of rows
3346 .  n - number of columns
3347 .  nz - number of nonzero blocks  per block row (same for all rows)
3348 -  nnz - array containing the number of nonzero blocks in the various block rows
3349          (possibly different for each block row) or NULL
3350 
3351    Output Parameter:
3352 .  A - the matrix
3353 
3354    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3355    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3356    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3357 
3358    Options Database Keys:
3359 .   -mat_no_unroll - uses code that does not unroll the loops in the
3360                      block calculations (much slower)
3361 .    -mat_block_size - size of the blocks to use
3362 
3363    Level: intermediate
3364 
3365    Notes:
3366    The number of rows and columns must be divisible by blocksize.
3367 
3368    If the nnz parameter is given then the nz parameter is ignored
3369 
3370    A nonzero block is any block that as 1 or more nonzeros in it
3371 
3372    The block AIJ format is fully compatible with standard Fortran 77
3373    storage.  That is, the stored row and column indices can begin at
3374    either one (as in Fortran) or zero.  See the users' manual for details.
3375 
3376    Specify the preallocated storage with either nz or nnz (not both).
3377    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3378    allocation.  See Users-Manual: ch_mat for details.
3379    matrices.
3380 
3381 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3382 @*/
3383 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3384 {
3385   PetscErrorCode ierr;
3386 
3387   PetscFunctionBegin;
3388   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3389   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3390   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3391   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3392   PetscFunctionReturn(0);
3393 }
3394 
3395 /*@C
3396    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3397    per row in the matrix. For good matrix assembly performance the
3398    user should preallocate the matrix storage by setting the parameter nz
3399    (or the array nnz).  By setting these parameters accurately, performance
3400    during matrix assembly can be increased by more than a factor of 50.
3401 
3402    Collective on MPI_Comm
3403 
3404    Input Parameters:
3405 +  B - the matrix
3406 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3407           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3408 .  nz - number of block nonzeros per block row (same for all rows)
3409 -  nnz - array containing the number of block nonzeros in the various block rows
3410          (possibly different for each block row) or NULL
3411 
3412    Options Database Keys:
3413 .   -mat_no_unroll - uses code that does not unroll the loops in the
3414                      block calculations (much slower)
3415 .   -mat_block_size - size of the blocks to use
3416 
3417    Level: intermediate
3418 
3419    Notes:
3420    If the nnz parameter is given then the nz parameter is ignored
3421 
3422    You can call MatGetInfo() to get information on how effective the preallocation was;
3423    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3424    You can also run with the option -info and look for messages with the string
3425    malloc in them to see if additional memory allocation was needed.
3426 
3427    The block AIJ format is fully compatible with standard Fortran 77
3428    storage.  That is, the stored row and column indices can begin at
3429    either one (as in Fortran) or zero.  See the users' manual for details.
3430 
3431    Specify the preallocated storage with either nz or nnz (not both).
3432    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3433    allocation.  See Users-Manual: ch_mat for details.
3434 
3435 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3436 @*/
3437 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3438 {
3439   PetscErrorCode ierr;
3440 
3441   PetscFunctionBegin;
3442   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3443   PetscValidType(B,1);
3444   PetscValidLogicalCollectiveInt(B,bs,2);
3445   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3446   PetscFunctionReturn(0);
3447 }
3448 
3449 /*@C
3450    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3451    (the default sequential PETSc format).
3452 
3453    Collective on MPI_Comm
3454 
3455    Input Parameters:
3456 +  B - the matrix
3457 .  i - the indices into j for the start of each local row (starts with zero)
3458 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3459 -  v - optional values in the matrix
3460 
3461    Level: developer
3462 
3463    Notes:
3464    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3465    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3466    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3467    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3468    block column and the second index is over columns within a block.
3469 
3470 .keywords: matrix, aij, compressed row, sparse
3471 
3472 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3473 @*/
3474 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3475 {
3476   PetscErrorCode ierr;
3477 
3478   PetscFunctionBegin;
3479   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3480   PetscValidType(B,1);
3481   PetscValidLogicalCollectiveInt(B,bs,2);
3482   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3483   PetscFunctionReturn(0);
3484 }
3485 
3486 
3487 /*@
3488      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3489 
3490      Collective on MPI_Comm
3491 
3492    Input Parameters:
3493 +  comm - must be an MPI communicator of size 1
3494 .  bs - size of block
3495 .  m - number of rows
3496 .  n - number of columns
3497 .  i - row indices
3498 .  j - column indices
3499 -  a - matrix values
3500 
3501    Output Parameter:
3502 .  mat - the matrix
3503 
3504    Level: advanced
3505 
3506    Notes:
3507        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3508     once the matrix is destroyed
3509 
3510        You cannot set new nonzero locations into this matrix, that will generate an error.
3511 
3512        The i and j indices are 0 based
3513 
3514        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3515 
3516       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3517       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3518       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3519       with column-major ordering within blocks.
3520 
3521 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3522 
3523 @*/
3524 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3525 {
3526   PetscErrorCode ierr;
3527   PetscInt       ii;
3528   Mat_SeqBAIJ    *baij;
3529 
3530   PetscFunctionBegin;
3531   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3532   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3533 
3534   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3535   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3536   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3537   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3538   baij = (Mat_SeqBAIJ*)(*mat)->data;
3539   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3540   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3541 
3542   baij->i = i;
3543   baij->j = j;
3544   baij->a = a;
3545 
3546   baij->singlemalloc = PETSC_FALSE;
3547   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3548   baij->free_a       = PETSC_FALSE;
3549   baij->free_ij      = PETSC_FALSE;
3550 
3551   for (ii=0; ii<m; ii++) {
3552     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3553 #if defined(PETSC_USE_DEBUG)
3554     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3555 #endif
3556   }
3557 #if defined(PETSC_USE_DEBUG)
3558   for (ii=0; ii<baij->i[m]; ii++) {
3559     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3560     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3561   }
3562 #endif
3563 
3564   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3565   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3566   PetscFunctionReturn(0);
3567 }
3568 
3569 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3570 {
3571   PetscErrorCode ierr;
3572   PetscMPIInt    size;
3573 
3574   PetscFunctionBegin;
3575   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3576   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3577     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3578   } else {
3579     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3580   }
3581   PetscFunctionReturn(0);
3582 }
3583