xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 58c0e5077dcf40d6a880c19c87f1075ae1d22c8e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912   PetscErrorCode    ierr;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953       }
954       if (N >= i) {
955         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   for (k=0; k<m; k++) { /* loop over added rows */
991     row  = im[k]; brow = row/4;
992     rp   = aj + ai[brow];
993     ap   = aa + 16*ai[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     high = nrow;
997     for (l=0; l<n; l++) { /* loop over added columns */
998       col   = in[l]; bcol = col/4;
999       ridx  = row % 4; cidx = col % 4;
1000       value = v[l + k*n];
1001       if (col <= lastcol)  low = 0;
1002       else                high = nrow;
1003       lastcol = col;
1004       while (high-low > 7) {
1005         t = (low+high)/2;
1006         if (rp[t] > bcol) high = t;
1007         else              low  = t;
1008       }
1009       for (i=low; i<high; i++) {
1010         if (rp[i] > bcol) break;
1011         if (rp[i] == bcol) {
1012           bap   = ap +  16*i + 4*cidx + ridx;
1013           *bap += value;
1014           goto noinsert1;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1049   } else {
1050     PetscInt n;
1051     n = PetscMin(a->mbs, a->nbs);
1052     diag = a->diag;
1053     for (i=0; i<n; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_HYPRE)
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1226 #endif
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = flg;
1241     break;
1242   case MAT_KEEP_NONZERO_PATTERN:
1243     a->keepnonzeropattern = flg;
1244     break;
1245   case MAT_NEW_NONZERO_LOCATIONS:
1246     a->nonew = (flg ? 0 : 1);
1247     break;
1248   case MAT_NEW_NONZERO_LOCATION_ERR:
1249     a->nonew = (flg ? -1 : 0);
1250     break;
1251   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252     a->nonew = (flg ? -2 : 0);
1253     break;
1254   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255     a->nounused = (flg ? -1 : 0);
1256     break;
1257   case MAT_NEW_DIAGONALS:
1258   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259   case MAT_USE_HASH_TABLE:
1260   case MAT_SORTED_FULL:
1261     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1262     break;
1263   case MAT_SPD:
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268   case MAT_SUBMAT_SINGLEIS:
1269   case MAT_STRUCTURE_ONLY:
1270     /* These options are handled directly by MatSetOption() */
1271     break;
1272   default:
1273     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1279 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1280 {
1281   PetscErrorCode ierr;
1282   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1283   MatScalar      *aa_i;
1284   PetscScalar    *v_i;
1285 
1286   PetscFunctionBegin;
1287   bs  = A->rmap->bs;
1288   bs2 = bs*bs;
1289   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1290 
1291   bn  = row/bs;   /* Block number */
1292   bp  = row % bs; /* Block Position */
1293   M   = ai[bn+1] - ai[bn];
1294   *nz = bs*M;
1295 
1296   if (v) {
1297     *v = 0;
1298     if (*nz) {
1299       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1300       for (i=0; i<M; i++) { /* for each block in the block row */
1301         v_i  = *v + i*bs;
1302         aa_i = aa + bs2*(ai[bn] + i);
1303         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1304       }
1305     }
1306   }
1307 
1308   if (idx) {
1309     *idx = 0;
1310     if (*nz) {
1311       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1312       for (i=0; i<M; i++) { /* for each block in the block row */
1313         idx_i = *idx + i*bs;
1314         itmp  = bs*aj[ai[bn] + i];
1315         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1316       }
1317     }
1318   }
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1323 {
1324   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1333 {
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1338   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1343 {
1344   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1345   Mat            C;
1346   PetscErrorCode ierr;
1347   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1348   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1349   MatScalar      *ata,*aa=a->a;
1350 
1351   PetscFunctionBegin;
1352   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1353   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1354     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1355 
1356     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1357     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1358     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1359     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1360 
1361     at  = (Mat_SeqBAIJ*)C->data;
1362     ati = at->i;
1363     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1364   } else {
1365     C = *B;
1366     at = (Mat_SeqBAIJ*)C->data;
1367     ati = at->i;
1368   }
1369 
1370   atj = at->j;
1371   ata = at->a;
1372 
1373   /* Copy ati into atfill so we have locations of the next free space in atj */
1374   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1375 
1376   /* Walk through A row-wise and mark nonzero entries of A^T. */
1377   for (i=0; i<mbs; i++) {
1378     anzj = ai[i+1] - ai[i];
1379     for (j=0; j<anzj; j++) {
1380       atj[atfill[*aj]] = i;
1381       for (kr=0; kr<bs; kr++) {
1382         for (k=0; k<bs; k++) {
1383           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1384         }
1385       }
1386       atfill[*aj++] += 1;
1387     }
1388   }
1389   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1391 
1392   /* Clean up temporary space and complete requests. */
1393   ierr = PetscFree(atfill);CHKERRQ(ierr);
1394 
1395   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1396     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1397     *B = C;
1398   } else {
1399     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1405 {
1406   PetscErrorCode ierr;
1407   Mat            Btrans;
1408 
1409   PetscFunctionBegin;
1410   *f   = PETSC_FALSE;
1411   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1412   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1413   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1414   PetscFunctionReturn(0);
1415 }
1416 
1417 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1418 {
1419   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1420   PetscErrorCode ierr;
1421   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1422   int            fd;
1423   PetscScalar    *aa;
1424   FILE           *file;
1425 
1426   PetscFunctionBegin;
1427   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1428   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1429   col_lens[0] = MAT_FILE_CLASSID;
1430 
1431   col_lens[1] = A->rmap->N;
1432   col_lens[2] = A->cmap->n;
1433   col_lens[3] = a->nz*bs2;
1434 
1435   /* store lengths of each row and write (including header) to file */
1436   count = 0;
1437   for (i=0; i<a->mbs; i++) {
1438     for (j=0; j<bs; j++) {
1439       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1440     }
1441   }
1442   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1443   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1444 
1445   /* store column indices (zero start index) */
1446   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1447   count = 0;
1448   for (i=0; i<a->mbs; i++) {
1449     for (j=0; j<bs; j++) {
1450       for (k=a->i[i]; k<a->i[i+1]; k++) {
1451         for (l=0; l<bs; l++) {
1452           jj[count++] = bs*a->j[k] + l;
1453         }
1454       }
1455     }
1456   }
1457   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1458   ierr = PetscFree(jj);CHKERRQ(ierr);
1459 
1460   /* store nonzero values */
1461   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1462   count = 0;
1463   for (i=0; i<a->mbs; i++) {
1464     for (j=0; j<bs; j++) {
1465       for (k=a->i[i]; k<a->i[i+1]; k++) {
1466         for (l=0; l<bs; l++) {
1467           aa[count++] = a->a[bs2*k + l*bs + j];
1468         }
1469       }
1470     }
1471   }
1472   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1473   ierr = PetscFree(aa);CHKERRQ(ierr);
1474 
1475   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1476   if (file) {
1477     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1478   }
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1483 {
1484   PetscErrorCode ierr;
1485   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1486   PetscInt       i,bs = A->rmap->bs,k;
1487 
1488   PetscFunctionBegin;
1489   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1490   for (i=0; i<a->mbs; i++) {
1491     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1492     for (k=a->i[i]; k<a->i[i+1]; k++) {
1493       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1494     }
1495     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1496   }
1497   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1498   PetscFunctionReturn(0);
1499 }
1500 
1501 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1502 {
1503   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1504   PetscErrorCode    ierr;
1505   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1506   PetscViewerFormat format;
1507 
1508   PetscFunctionBegin;
1509   if (A->structure_only) {
1510     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1511     PetscFunctionReturn(0);
1512   }
1513 
1514   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1515   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1516     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1518     const char *matname;
1519     Mat        aij;
1520     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1521     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1522     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1523     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1524     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1525   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1526       PetscFunctionReturn(0);
1527   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1528     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1529     for (i=0; i<a->mbs; i++) {
1530       for (j=0; j<bs; j++) {
1531         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1532         for (k=a->i[i]; k<a->i[i+1]; k++) {
1533           for (l=0; l<bs; l++) {
1534 #if defined(PETSC_USE_COMPLEX)
1535             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1536               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1537                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1538             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1539               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1540                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1542               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1543             }
1544 #else
1545             if (a->a[bs2*k + l*bs + j] != 0.0) {
1546               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1547             }
1548 #endif
1549           }
1550         }
1551         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1552       }
1553     }
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1555   } else {
1556     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1557     for (i=0; i<a->mbs; i++) {
1558       for (j=0; j<bs; j++) {
1559         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1560         for (k=a->i[i]; k<a->i[i+1]; k++) {
1561           for (l=0; l<bs; l++) {
1562 #if defined(PETSC_USE_COMPLEX)
1563             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1564               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1565                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1566             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1567               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1568                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             } else {
1570               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1571             }
1572 #else
1573             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1574 #endif
1575           }
1576         }
1577         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1578       }
1579     }
1580     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1581   }
1582   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1583   PetscFunctionReturn(0);
1584 }
1585 
1586 #include <petscdraw.h>
1587 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1588 {
1589   Mat               A = (Mat) Aa;
1590   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1591   PetscErrorCode    ierr;
1592   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1593   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1594   MatScalar         *aa;
1595   PetscViewer       viewer;
1596   PetscViewerFormat format;
1597 
1598   PetscFunctionBegin;
1599   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1600   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1601   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1602 
1603   /* loop over matrix elements drawing boxes */
1604 
1605   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1606     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1607     /* Blue for negative, Cyan for zero and  Red for positive */
1608     color = PETSC_DRAW_BLUE;
1609     for (i=0,row=0; i<mbs; i++,row+=bs) {
1610       for (j=a->i[i]; j<a->i[i+1]; j++) {
1611         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1612         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1613         aa  = a->a + j*bs2;
1614         for (k=0; k<bs; k++) {
1615           for (l=0; l<bs; l++) {
1616             if (PetscRealPart(*aa++) >=  0.) continue;
1617             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1618           }
1619         }
1620       }
1621     }
1622     color = PETSC_DRAW_CYAN;
1623     for (i=0,row=0; i<mbs; i++,row+=bs) {
1624       for (j=a->i[i]; j<a->i[i+1]; j++) {
1625         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1626         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1627         aa  = a->a + j*bs2;
1628         for (k=0; k<bs; k++) {
1629           for (l=0; l<bs; l++) {
1630             if (PetscRealPart(*aa++) != 0.) continue;
1631             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1632           }
1633         }
1634       }
1635     }
1636     color = PETSC_DRAW_RED;
1637     for (i=0,row=0; i<mbs; i++,row+=bs) {
1638       for (j=a->i[i]; j<a->i[i+1]; j++) {
1639         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1640         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1641         aa  = a->a + j*bs2;
1642         for (k=0; k<bs; k++) {
1643           for (l=0; l<bs; l++) {
1644             if (PetscRealPart(*aa++) <= 0.) continue;
1645             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1646           }
1647         }
1648       }
1649     }
1650     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1651   } else {
1652     /* use contour shading to indicate magnitude of values */
1653     /* first determine max of all nonzero values */
1654     PetscReal minv = 0.0, maxv = 0.0;
1655     PetscDraw popup;
1656 
1657     for (i=0; i<a->nz*a->bs2; i++) {
1658       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1659     }
1660     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1661     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1662     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1663 
1664     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1665     for (i=0,row=0; i<mbs; i++,row+=bs) {
1666       for (j=a->i[i]; j<a->i[i+1]; j++) {
1667         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1668         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1669         aa  = a->a + j*bs2;
1670         for (k=0; k<bs; k++) {
1671           for (l=0; l<bs; l++) {
1672             MatScalar v = *aa++;
1673             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1674             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1675           }
1676         }
1677       }
1678     }
1679     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1680   }
1681   PetscFunctionReturn(0);
1682 }
1683 
1684 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1685 {
1686   PetscErrorCode ierr;
1687   PetscReal      xl,yl,xr,yr,w,h;
1688   PetscDraw      draw;
1689   PetscBool      isnull;
1690 
1691   PetscFunctionBegin;
1692   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1693   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1694   if (isnull) PetscFunctionReturn(0);
1695 
1696   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1697   xr  += w;          yr += h;        xl = -w;     yl = -h;
1698   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1700   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1701   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1702   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 
1706 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1707 {
1708   PetscErrorCode ierr;
1709   PetscBool      iascii,isbinary,isdraw;
1710 
1711   PetscFunctionBegin;
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1714   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1715   if (iascii) {
1716     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1717   } else if (isbinary) {
1718     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1719   } else if (isdraw) {
1720     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1721   } else {
1722     Mat B;
1723     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1724     ierr = MatView(B,viewer);CHKERRQ(ierr);
1725     ierr = MatDestroy(&B);CHKERRQ(ierr);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 
1731 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1732 {
1733   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1734   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1735   PetscInt    *ai = a->i,*ailen = a->ilen;
1736   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1737   MatScalar   *ap,*aa = a->a;
1738 
1739   PetscFunctionBegin;
1740   for (k=0; k<m; k++) { /* loop over rows */
1741     row = im[k]; brow = row/bs;
1742     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1743     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1744     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1745     nrow = ailen[brow];
1746     for (l=0; l<n; l++) { /* loop over columns */
1747       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1748       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1749       col  = in[l];
1750       bcol = col/bs;
1751       cidx = col%bs;
1752       ridx = row%bs;
1753       high = nrow;
1754       low  = 0; /* assume unsorted */
1755       while (high-low > 5) {
1756         t = (low+high)/2;
1757         if (rp[t] > bcol) high = t;
1758         else             low  = t;
1759       }
1760       for (i=low; i<high; i++) {
1761         if (rp[i] > bcol) break;
1762         if (rp[i] == bcol) {
1763           *v++ = ap[bs2*i+bs*cidx+ridx];
1764           goto finished;
1765         }
1766       }
1767       *v++ = 0.0;
1768 finished:;
1769     }
1770   }
1771   PetscFunctionReturn(0);
1772 }
1773 
1774 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1775 {
1776   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1777   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1778   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1779   PetscErrorCode    ierr;
1780   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1781   PetscBool         roworiented=a->roworiented;
1782   const PetscScalar *value     = v;
1783   MatScalar         *ap=NULL,*aa = a->a,*bap;
1784 
1785   PetscFunctionBegin;
1786   if (roworiented) {
1787     stepval = (n-1)*bs;
1788   } else {
1789     stepval = (m-1)*bs;
1790   }
1791   for (k=0; k<m; k++) { /* loop over added rows */
1792     row = im[k];
1793     if (row < 0) continue;
1794 #if defined(PETSC_USE_DEBUG)
1795     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1796 #endif
1797     rp   = aj + ai[row];
1798     if (!A->structure_only) ap = aa + bs2*ai[row];
1799     rmax = imax[row];
1800     nrow = ailen[row];
1801     low  = 0;
1802     high = nrow;
1803     for (l=0; l<n; l++) { /* loop over added columns */
1804       if (in[l] < 0) continue;
1805 #if defined(PETSC_USE_DEBUG)
1806       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1807 #endif
1808       col = in[l];
1809       if (!A->structure_only) {
1810         if (roworiented) {
1811           value = v + (k*(stepval+bs) + l)*bs;
1812         } else {
1813           value = v + (l*(stepval+bs) + k)*bs;
1814         }
1815       }
1816       if (col <= lastcol) low = 0;
1817       else high = nrow;
1818       lastcol = col;
1819       while (high-low > 7) {
1820         t = (low+high)/2;
1821         if (rp[t] > col) high = t;
1822         else             low  = t;
1823       }
1824       for (i=low; i<high; i++) {
1825         if (rp[i] > col) break;
1826         if (rp[i] == col) {
1827           if (A->structure_only) goto noinsert2;
1828           bap = ap +  bs2*i;
1829           if (roworiented) {
1830             if (is == ADD_VALUES) {
1831               for (ii=0; ii<bs; ii++,value+=stepval) {
1832                 for (jj=ii; jj<bs2; jj+=bs) {
1833                   bap[jj] += *value++;
1834                 }
1835               }
1836             } else {
1837               for (ii=0; ii<bs; ii++,value+=stepval) {
1838                 for (jj=ii; jj<bs2; jj+=bs) {
1839                   bap[jj] = *value++;
1840                 }
1841               }
1842             }
1843           } else {
1844             if (is == ADD_VALUES) {
1845               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1846                 for (jj=0; jj<bs; jj++) {
1847                   bap[jj] += value[jj];
1848                 }
1849                 bap += bs;
1850               }
1851             } else {
1852               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1853                 for (jj=0; jj<bs; jj++) {
1854                   bap[jj]  = value[jj];
1855                 }
1856                 bap += bs;
1857               }
1858             }
1859           }
1860           goto noinsert2;
1861         }
1862       }
1863       if (nonew == 1) goto noinsert2;
1864       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1865       if (A->structure_only) {
1866         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1867       } else {
1868         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1869       }
1870       N = nrow++ - 1; high++;
1871       /* shift up all the later entries in this row */
1872       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1873       rp[i] = col;
1874       if (!A->structure_only) {
1875         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1876         bap   = ap +  bs2*i;
1877         if (roworiented) {
1878           for (ii=0; ii<bs; ii++,value+=stepval) {
1879             for (jj=ii; jj<bs2; jj+=bs) {
1880               bap[jj] = *value++;
1881             }
1882           }
1883         } else {
1884           for (ii=0; ii<bs; ii++,value+=stepval) {
1885             for (jj=0; jj<bs; jj++) {
1886               *bap++ = *value++;
1887             }
1888           }
1889         }
1890       }
1891 noinsert2:;
1892       low = i;
1893     }
1894     ailen[row] = nrow;
1895   }
1896   PetscFunctionReturn(0);
1897 }
1898 
1899 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1900 {
1901   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1902   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1903   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1904   PetscErrorCode ierr;
1905   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1906   MatScalar      *aa  = a->a,*ap;
1907   PetscReal      ratio=0.6;
1908 
1909   PetscFunctionBegin;
1910   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1911 
1912   if (m) rmax = ailen[0];
1913   for (i=1; i<mbs; i++) {
1914     /* move each row back by the amount of empty slots (fshift) before it*/
1915     fshift += imax[i-1] - ailen[i-1];
1916     rmax    = PetscMax(rmax,ailen[i]);
1917     if (fshift) {
1918       ip = aj + ai[i];
1919       ap = aa + bs2*ai[i];
1920       N  = ailen[i];
1921       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1922       if (!A->structure_only) {
1923         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1924       }
1925     }
1926     ai[i] = ai[i-1] + ailen[i-1];
1927   }
1928   if (mbs) {
1929     fshift += imax[mbs-1] - ailen[mbs-1];
1930     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1931   }
1932 
1933   /* reset ilen and imax for each row */
1934   a->nonzerorowcnt = 0;
1935   if (A->structure_only) {
1936     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1937   } else { /* !A->structure_only */
1938     for (i=0; i<mbs; i++) {
1939       ailen[i] = imax[i] = ai[i+1] - ai[i];
1940       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1941     }
1942   }
1943   a->nz = ai[mbs];
1944 
1945   /* diagonals may have moved, so kill the diagonal pointers */
1946   a->idiagvalid = PETSC_FALSE;
1947   if (fshift && a->diag) {
1948     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1949     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1950     a->diag = 0;
1951   }
1952   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1953   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1954   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1955   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1956 
1957   A->info.mallocs    += a->reallocs;
1958   a->reallocs         = 0;
1959   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1960   a->rmax             = rmax;
1961 
1962   if (!A->structure_only) {
1963     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1964   }
1965   PetscFunctionReturn(0);
1966 }
1967 
1968 /*
1969    This function returns an array of flags which indicate the locations of contiguous
1970    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1971    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1972    Assume: sizes should be long enough to hold all the values.
1973 */
1974 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1975 {
1976   PetscInt  i,j,k,row;
1977   PetscBool flg;
1978 
1979   PetscFunctionBegin;
1980   for (i=0,j=0; i<n; j++) {
1981     row = idx[i];
1982     if (row%bs!=0) { /* Not the begining of a block */
1983       sizes[j] = 1;
1984       i++;
1985     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1986       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1987       i++;
1988     } else { /* Begining of the block, so check if the complete block exists */
1989       flg = PETSC_TRUE;
1990       for (k=1; k<bs; k++) {
1991         if (row+k != idx[i+k]) { /* break in the block */
1992           flg = PETSC_FALSE;
1993           break;
1994         }
1995       }
1996       if (flg) { /* No break in the bs */
1997         sizes[j] = bs;
1998         i       += bs;
1999       } else {
2000         sizes[j] = 1;
2001         i++;
2002       }
2003     }
2004   }
2005   *bs_max = j;
2006   PetscFunctionReturn(0);
2007 }
2008 
2009 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2010 {
2011   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2012   PetscErrorCode    ierr;
2013   PetscInt          i,j,k,count,*rows;
2014   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2015   PetscScalar       zero = 0.0;
2016   MatScalar         *aa;
2017   const PetscScalar *xx;
2018   PetscScalar       *bb;
2019 
2020   PetscFunctionBegin;
2021   /* fix right hand side if needed */
2022   if (x && b) {
2023     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2024     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2025     for (i=0; i<is_n; i++) {
2026       bb[is_idx[i]] = diag*xx[is_idx[i]];
2027     }
2028     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2029     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2030   }
2031 
2032   /* Make a copy of the IS and  sort it */
2033   /* allocate memory for rows,sizes */
2034   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2035 
2036   /* copy IS values to rows, and sort them */
2037   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2038   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2039 
2040   if (baij->keepnonzeropattern) {
2041     for (i=0; i<is_n; i++) sizes[i] = 1;
2042     bs_max          = is_n;
2043   } else {
2044     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2045     A->nonzerostate++;
2046   }
2047 
2048   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2049     row = rows[j];
2050     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2051     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2052     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2053     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2054       if (diag != (PetscScalar)0.0) {
2055         if (baij->ilen[row/bs] > 0) {
2056           baij->ilen[row/bs]       = 1;
2057           baij->j[baij->i[row/bs]] = row/bs;
2058 
2059           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2060         }
2061         /* Now insert all the diagonal values for this bs */
2062         for (k=0; k<bs; k++) {
2063           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2064         }
2065       } else { /* (diag == 0.0) */
2066         baij->ilen[row/bs] = 0;
2067       } /* end (diag == 0.0) */
2068     } else { /* (sizes[i] != bs) */
2069 #if defined(PETSC_USE_DEBUG)
2070       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2071 #endif
2072       for (k=0; k<count; k++) {
2073         aa[0] =  zero;
2074         aa   += bs;
2075       }
2076       if (diag != (PetscScalar)0.0) {
2077         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2078       }
2079     }
2080   }
2081 
2082   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2083   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2084   PetscFunctionReturn(0);
2085 }
2086 
2087 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2088 {
2089   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2090   PetscErrorCode    ierr;
2091   PetscInt          i,j,k,count;
2092   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2093   PetscScalar       zero = 0.0;
2094   MatScalar         *aa;
2095   const PetscScalar *xx;
2096   PetscScalar       *bb;
2097   PetscBool         *zeroed,vecs = PETSC_FALSE;
2098 
2099   PetscFunctionBegin;
2100   /* fix right hand side if needed */
2101   if (x && b) {
2102     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2103     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2104     vecs = PETSC_TRUE;
2105   }
2106 
2107   /* zero the columns */
2108   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2109   for (i=0; i<is_n; i++) {
2110     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2111     zeroed[is_idx[i]] = PETSC_TRUE;
2112   }
2113   for (i=0; i<A->rmap->N; i++) {
2114     if (!zeroed[i]) {
2115       row = i/bs;
2116       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2117         for (k=0; k<bs; k++) {
2118           col = bs*baij->j[j] + k;
2119           if (zeroed[col]) {
2120             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2121             if (vecs) bb[i] -= aa[0]*xx[col];
2122             aa[0] = 0.0;
2123           }
2124         }
2125       }
2126     } else if (vecs) bb[i] = diag*xx[i];
2127   }
2128   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2129   if (vecs) {
2130     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2131     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2132   }
2133 
2134   /* zero the rows */
2135   for (i=0; i<is_n; i++) {
2136     row   = is_idx[i];
2137     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2138     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2139     for (k=0; k<count; k++) {
2140       aa[0] =  zero;
2141       aa   += bs;
2142     }
2143     if (diag != (PetscScalar)0.0) {
2144       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2145     }
2146   }
2147   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2148   PetscFunctionReturn(0);
2149 }
2150 
2151 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2152 {
2153   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2154   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2155   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2156   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2157   PetscErrorCode ierr;
2158   PetscInt       ridx,cidx,bs2=a->bs2;
2159   PetscBool      roworiented=a->roworiented;
2160   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2161 
2162   PetscFunctionBegin;
2163   for (k=0; k<m; k++) { /* loop over added rows */
2164     row  = im[k];
2165     brow = row/bs;
2166     if (row < 0) continue;
2167 #if defined(PETSC_USE_DEBUG)
2168     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2169 #endif
2170     rp   = aj + ai[brow];
2171     if (!A->structure_only) ap = aa + bs2*ai[brow];
2172     rmax = imax[brow];
2173     nrow = ailen[brow];
2174     low  = 0;
2175     high = nrow;
2176     for (l=0; l<n; l++) { /* loop over added columns */
2177       if (in[l] < 0) continue;
2178 #if defined(PETSC_USE_DEBUG)
2179       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2180 #endif
2181       col  = in[l]; bcol = col/bs;
2182       ridx = row % bs; cidx = col % bs;
2183       if (!A->structure_only) {
2184         if (roworiented) {
2185           value = v[l + k*n];
2186         } else {
2187           value = v[k + l*m];
2188         }
2189       }
2190       if (col <= lastcol) low = 0; else high = nrow;
2191       lastcol = col;
2192       while (high-low > 7) {
2193         t = (low+high)/2;
2194         if (rp[t] > bcol) high = t;
2195         else              low  = t;
2196       }
2197       for (i=low; i<high; i++) {
2198         if (rp[i] > bcol) break;
2199         if (rp[i] == bcol) {
2200           bap = ap +  bs2*i + bs*cidx + ridx;
2201           if (!A->structure_only) {
2202             if (is == ADD_VALUES) *bap += value;
2203             else                  *bap  = value;
2204           }
2205           goto noinsert1;
2206         }
2207       }
2208       if (nonew == 1) goto noinsert1;
2209       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2210       if (A->structure_only) {
2211         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2212       } else {
2213         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2214       }
2215       N = nrow++ - 1; high++;
2216       /* shift up all the later entries in this row */
2217       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2218       rp[i] = bcol;
2219       if (!A->structure_only) {
2220         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2221         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2222         ap[bs2*i + bs*cidx + ridx] = value;
2223       }
2224       a->nz++;
2225       A->nonzerostate++;
2226 noinsert1:;
2227       low = i;
2228     }
2229     ailen[brow] = nrow;
2230   }
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2235 {
2236   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2237   Mat            outA;
2238   PetscErrorCode ierr;
2239   PetscBool      row_identity,col_identity;
2240 
2241   PetscFunctionBegin;
2242   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2243   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2244   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2245   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2246 
2247   outA            = inA;
2248   inA->factortype = MAT_FACTOR_LU;
2249   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2250   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2251 
2252   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2253 
2254   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2255   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2256   a->row = row;
2257   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2258   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2259   a->col = col;
2260 
2261   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2262   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2263   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2264   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2265 
2266   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2267   if (!a->solve_work) {
2268     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2269     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2270   }
2271   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2276 {
2277   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2278   PetscInt    i,nz,mbs;
2279 
2280   PetscFunctionBegin;
2281   nz  = baij->maxnz;
2282   mbs = baij->mbs;
2283   for (i=0; i<nz; i++) {
2284     baij->j[i] = indices[i];
2285   }
2286   baij->nz = nz;
2287   for (i=0; i<mbs; i++) {
2288     baij->ilen[i] = baij->imax[i];
2289   }
2290   PetscFunctionReturn(0);
2291 }
2292 
2293 /*@
2294     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2295        in the matrix.
2296 
2297   Input Parameters:
2298 +  mat - the SeqBAIJ matrix
2299 -  indices - the column indices
2300 
2301   Level: advanced
2302 
2303   Notes:
2304     This can be called if you have precomputed the nonzero structure of the
2305   matrix and want to provide it to the matrix object to improve the performance
2306   of the MatSetValues() operation.
2307 
2308     You MUST have set the correct numbers of nonzeros per row in the call to
2309   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2310 
2311     MUST be called before any calls to MatSetValues();
2312 
2313 @*/
2314 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2315 {
2316   PetscErrorCode ierr;
2317 
2318   PetscFunctionBegin;
2319   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2320   PetscValidPointer(indices,2);
2321   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2322   PetscFunctionReturn(0);
2323 }
2324 
2325 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2326 {
2327   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2328   PetscErrorCode ierr;
2329   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2330   PetscReal      atmp;
2331   PetscScalar    *x,zero = 0.0;
2332   MatScalar      *aa;
2333   PetscInt       ncols,brow,krow,kcol;
2334 
2335   PetscFunctionBegin;
2336   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2337   bs  = A->rmap->bs;
2338   aa  = a->a;
2339   ai  = a->i;
2340   aj  = a->j;
2341   mbs = a->mbs;
2342 
2343   ierr = VecSet(v,zero);CHKERRQ(ierr);
2344   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2345   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2346   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2347   for (i=0; i<mbs; i++) {
2348     ncols = ai[1] - ai[0]; ai++;
2349     brow  = bs*i;
2350     for (j=0; j<ncols; j++) {
2351       for (kcol=0; kcol<bs; kcol++) {
2352         for (krow=0; krow<bs; krow++) {
2353           atmp = PetscAbsScalar(*aa);aa++;
2354           row  = brow + krow;   /* row index */
2355           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2356         }
2357       }
2358       aj++;
2359     }
2360   }
2361   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2362   PetscFunctionReturn(0);
2363 }
2364 
2365 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2366 {
2367   PetscErrorCode ierr;
2368 
2369   PetscFunctionBegin;
2370   /* If the two matrices have the same copy implementation, use fast copy. */
2371   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2372     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2373     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2374     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2375 
2376     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2377     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2378     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2379     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2380   } else {
2381     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2382   }
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2387 {
2388   PetscErrorCode ierr;
2389 
2390   PetscFunctionBegin;
2391   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2392   PetscFunctionReturn(0);
2393 }
2394 
2395 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2396 {
2397   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2398 
2399   PetscFunctionBegin;
2400   *array = a->a;
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2405 {
2406   PetscFunctionBegin;
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2411 {
2412   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2413   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2414   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2415   PetscErrorCode ierr;
2416 
2417   PetscFunctionBegin;
2418   /* Set the number of nonzeros in the new matrix */
2419   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2424 {
2425   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2426   PetscErrorCode ierr;
2427   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2428   PetscBLASInt   one=1;
2429 
2430   PetscFunctionBegin;
2431   if (str == SAME_NONZERO_PATTERN) {
2432     PetscScalar  alpha = a;
2433     PetscBLASInt bnz;
2434     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2435     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2436     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2437   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2438     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2439   } else {
2440     Mat      B;
2441     PetscInt *nnz;
2442     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2443     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2444     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2445     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2446     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2447     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2448     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2449     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2450     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2451     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2452     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2453     ierr = PetscFree(nnz);CHKERRQ(ierr);
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2459 {
2460   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2461   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2462   MatScalar   *aa = a->a;
2463 
2464   PetscFunctionBegin;
2465   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2466   PetscFunctionReturn(0);
2467 }
2468 
2469 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2470 {
2471   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2472   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2473   MatScalar   *aa = a->a;
2474 
2475   PetscFunctionBegin;
2476   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2477   PetscFunctionReturn(0);
2478 }
2479 
2480 /*
2481     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2482 */
2483 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2484 {
2485   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2486   PetscErrorCode ierr;
2487   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2488   PetscInt       nz = a->i[m],row,*jj,mr,col;
2489 
2490   PetscFunctionBegin;
2491   *nn = n;
2492   if (!ia) PetscFunctionReturn(0);
2493   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2494   else {
2495     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2496     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2497     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2498     jj   = a->j;
2499     for (i=0; i<nz; i++) {
2500       collengths[jj[i]]++;
2501     }
2502     cia[0] = oshift;
2503     for (i=0; i<n; i++) {
2504       cia[i+1] = cia[i] + collengths[i];
2505     }
2506     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2507     jj   = a->j;
2508     for (row=0; row<m; row++) {
2509       mr = a->i[row+1] - a->i[row];
2510       for (i=0; i<mr; i++) {
2511         col = *jj++;
2512 
2513         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2514       }
2515     }
2516     ierr = PetscFree(collengths);CHKERRQ(ierr);
2517     *ia  = cia; *ja = cja;
2518   }
2519   PetscFunctionReturn(0);
2520 }
2521 
2522 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2523 {
2524   PetscErrorCode ierr;
2525 
2526   PetscFunctionBegin;
2527   if (!ia) PetscFunctionReturn(0);
2528   ierr = PetscFree(*ia);CHKERRQ(ierr);
2529   ierr = PetscFree(*ja);CHKERRQ(ierr);
2530   PetscFunctionReturn(0);
2531 }
2532 
2533 /*
2534  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2535  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2536  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2537  */
2538 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2539 {
2540   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2541   PetscErrorCode ierr;
2542   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2543   PetscInt       nz = a->i[m],row,*jj,mr,col;
2544   PetscInt       *cspidx;
2545 
2546   PetscFunctionBegin;
2547   *nn = n;
2548   if (!ia) PetscFunctionReturn(0);
2549 
2550   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2551   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2552   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2553   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2554   jj   = a->j;
2555   for (i=0; i<nz; i++) {
2556     collengths[jj[i]]++;
2557   }
2558   cia[0] = oshift;
2559   for (i=0; i<n; i++) {
2560     cia[i+1] = cia[i] + collengths[i];
2561   }
2562   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2563   jj   = a->j;
2564   for (row=0; row<m; row++) {
2565     mr = a->i[row+1] - a->i[row];
2566     for (i=0; i<mr; i++) {
2567       col = *jj++;
2568       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2569       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2570     }
2571   }
2572   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2573   *ia    = cia;
2574   *ja    = cja;
2575   *spidx = cspidx;
2576   PetscFunctionReturn(0);
2577 }
2578 
2579 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2580 {
2581   PetscErrorCode ierr;
2582 
2583   PetscFunctionBegin;
2584   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2585   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2590 {
2591   PetscErrorCode ierr;
2592   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2593 
2594   PetscFunctionBegin;
2595   if (!Y->preallocated || !aij->nz) {
2596     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2597   }
2598   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2599   PetscFunctionReturn(0);
2600 }
2601 
2602 /* -------------------------------------------------------------------*/
2603 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2604                                        MatGetRow_SeqBAIJ,
2605                                        MatRestoreRow_SeqBAIJ,
2606                                        MatMult_SeqBAIJ_N,
2607                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2608                                        MatMultTranspose_SeqBAIJ,
2609                                        MatMultTransposeAdd_SeqBAIJ,
2610                                        0,
2611                                        0,
2612                                        0,
2613                                /* 10*/ 0,
2614                                        MatLUFactor_SeqBAIJ,
2615                                        0,
2616                                        0,
2617                                        MatTranspose_SeqBAIJ,
2618                                /* 15*/ MatGetInfo_SeqBAIJ,
2619                                        MatEqual_SeqBAIJ,
2620                                        MatGetDiagonal_SeqBAIJ,
2621                                        MatDiagonalScale_SeqBAIJ,
2622                                        MatNorm_SeqBAIJ,
2623                                /* 20*/ 0,
2624                                        MatAssemblyEnd_SeqBAIJ,
2625                                        MatSetOption_SeqBAIJ,
2626                                        MatZeroEntries_SeqBAIJ,
2627                                /* 24*/ MatZeroRows_SeqBAIJ,
2628                                        0,
2629                                        0,
2630                                        0,
2631                                        0,
2632                                /* 29*/ MatSetUp_SeqBAIJ,
2633                                        0,
2634                                        0,
2635                                        0,
2636                                        0,
2637                                /* 34*/ MatDuplicate_SeqBAIJ,
2638                                        0,
2639                                        0,
2640                                        MatILUFactor_SeqBAIJ,
2641                                        0,
2642                                /* 39*/ MatAXPY_SeqBAIJ,
2643                                        MatCreateSubMatrices_SeqBAIJ,
2644                                        MatIncreaseOverlap_SeqBAIJ,
2645                                        MatGetValues_SeqBAIJ,
2646                                        MatCopy_SeqBAIJ,
2647                                /* 44*/ 0,
2648                                        MatScale_SeqBAIJ,
2649                                        MatShift_SeqBAIJ,
2650                                        0,
2651                                        MatZeroRowsColumns_SeqBAIJ,
2652                                /* 49*/ 0,
2653                                        MatGetRowIJ_SeqBAIJ,
2654                                        MatRestoreRowIJ_SeqBAIJ,
2655                                        MatGetColumnIJ_SeqBAIJ,
2656                                        MatRestoreColumnIJ_SeqBAIJ,
2657                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2658                                        0,
2659                                        0,
2660                                        0,
2661                                        MatSetValuesBlocked_SeqBAIJ,
2662                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2663                                        MatDestroy_SeqBAIJ,
2664                                        MatView_SeqBAIJ,
2665                                        0,
2666                                        0,
2667                                /* 64*/ 0,
2668                                        0,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2673                                        0,
2674                                        MatConvert_Basic,
2675                                        0,
2676                                        0,
2677                                /* 74*/ 0,
2678                                        MatFDColoringApply_BAIJ,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                /* 79*/ 0,
2683                                        0,
2684                                        0,
2685                                        0,
2686                                        MatLoad_SeqBAIJ,
2687                                /* 84*/ 0,
2688                                        0,
2689                                        0,
2690                                        0,
2691                                        0,
2692                                /* 89*/ 0,
2693                                        0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                /* 94*/ 0,
2698                                        0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                /* 99*/ 0,
2703                                        0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                /*104*/ 0,
2708                                        MatRealPart_SeqBAIJ,
2709                                        MatImaginaryPart_SeqBAIJ,
2710                                        0,
2711                                        0,
2712                                /*109*/ 0,
2713                                        0,
2714                                        0,
2715                                        0,
2716                                        MatMissingDiagonal_SeqBAIJ,
2717                                /*114*/ 0,
2718                                        0,
2719                                        0,
2720                                        0,
2721                                        0,
2722                                /*119*/ 0,
2723                                        0,
2724                                        MatMultHermitianTranspose_SeqBAIJ,
2725                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2726                                        0,
2727                                /*124*/ 0,
2728                                        0,
2729                                        MatInvertBlockDiagonal_SeqBAIJ,
2730                                        0,
2731                                        0,
2732                                /*129*/ 0,
2733                                        0,
2734                                        0,
2735                                        0,
2736                                        0,
2737                                /*134*/ 0,
2738                                        0,
2739                                        0,
2740                                        0,
2741                                        0,
2742                                /*139*/ MatSetBlockSizes_Default,
2743                                        0,
2744                                        0,
2745                                        MatFDColoringSetUp_SeqXAIJ,
2746                                        0,
2747                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2748                                        MatDestroySubMatrices_SeqBAIJ
2749 };
2750 
2751 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2752 {
2753   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2754   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2755   PetscErrorCode ierr;
2756 
2757   PetscFunctionBegin;
2758   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2759 
2760   /* allocate space for values if not already there */
2761   if (!aij->saved_values) {
2762     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2763     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2764   }
2765 
2766   /* copy values over */
2767   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2772 {
2773   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2774   PetscErrorCode ierr;
2775   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2776 
2777   PetscFunctionBegin;
2778   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2779   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2780 
2781   /* copy values over */
2782   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2783   PetscFunctionReturn(0);
2784 }
2785 
2786 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2787 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2788 
2789 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2790 {
2791   Mat_SeqBAIJ    *b;
2792   PetscErrorCode ierr;
2793   PetscInt       i,mbs,nbs,bs2;
2794   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2795 
2796   PetscFunctionBegin;
2797   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2798   if (nz == MAT_SKIP_ALLOCATION) {
2799     skipallocation = PETSC_TRUE;
2800     nz             = 0;
2801   }
2802 
2803   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2804   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2805   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2806   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2807 
2808   B->preallocated = PETSC_TRUE;
2809 
2810   mbs = B->rmap->n/bs;
2811   nbs = B->cmap->n/bs;
2812   bs2 = bs*bs;
2813 
2814   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2815 
2816   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2817   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2818   if (nnz) {
2819     for (i=0; i<mbs; i++) {
2820       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2821       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2822     }
2823   }
2824 
2825   b    = (Mat_SeqBAIJ*)B->data;
2826   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2827   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2828   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2829 
2830   if (!flg) {
2831     switch (bs) {
2832     case 1:
2833       B->ops->mult    = MatMult_SeqBAIJ_1;
2834       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2835       break;
2836     case 2:
2837       B->ops->mult    = MatMult_SeqBAIJ_2;
2838       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2839       break;
2840     case 3:
2841       B->ops->mult    = MatMult_SeqBAIJ_3;
2842       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2843       break;
2844     case 4:
2845       B->ops->mult    = MatMult_SeqBAIJ_4;
2846       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2847       break;
2848     case 5:
2849       B->ops->mult    = MatMult_SeqBAIJ_5;
2850       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2851       break;
2852     case 6:
2853       B->ops->mult    = MatMult_SeqBAIJ_6;
2854       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2855       break;
2856     case 7:
2857       B->ops->mult    = MatMult_SeqBAIJ_7;
2858       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2859       break;
2860     case 9:
2861 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2862       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2863       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2864 #else
2865       B->ops->mult    = MatMult_SeqBAIJ_N;
2866       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2867 #endif
2868       break;
2869     case 11:
2870       B->ops->mult    = MatMult_SeqBAIJ_11;
2871       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2872       break;
2873     case 15:
2874       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2875       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2876       break;
2877     default:
2878       B->ops->mult    = MatMult_SeqBAIJ_N;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2880       break;
2881     }
2882   }
2883   B->ops->sor = MatSOR_SeqBAIJ;
2884   b->mbs = mbs;
2885   b->nbs = nbs;
2886   if (!skipallocation) {
2887     if (!b->imax) {
2888       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2889       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2890 
2891       b->free_imax_ilen = PETSC_TRUE;
2892     }
2893     /* b->ilen will count nonzeros in each block row so far. */
2894     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2895     if (!nnz) {
2896       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2897       else if (nz < 0) nz = 1;
2898       nz = PetscMin(nz,nbs);
2899       for (i=0; i<mbs; i++) b->imax[i] = nz;
2900       nz = nz*mbs;
2901     } else {
2902       PetscInt64 nz64 = 0;
2903       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2904       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2905     }
2906 
2907     /* allocate the matrix space */
2908     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2909     if (B->structure_only) {
2910       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2911       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2912       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2913     } else {
2914       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2915       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2916       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2917     }
2918     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2919 
2920     if (B->structure_only) {
2921       b->singlemalloc = PETSC_FALSE;
2922       b->free_a       = PETSC_FALSE;
2923     } else {
2924       b->singlemalloc = PETSC_TRUE;
2925       b->free_a       = PETSC_TRUE;
2926     }
2927     b->free_ij = PETSC_TRUE;
2928 
2929     b->i[0] = 0;
2930     for (i=1; i<mbs+1; i++) {
2931       b->i[i] = b->i[i-1] + b->imax[i-1];
2932     }
2933 
2934   } else {
2935     b->free_a  = PETSC_FALSE;
2936     b->free_ij = PETSC_FALSE;
2937   }
2938 
2939   b->bs2              = bs2;
2940   b->mbs              = mbs;
2941   b->nz               = 0;
2942   b->maxnz            = nz;
2943   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2944   B->was_assembled    = PETSC_FALSE;
2945   B->assembled        = PETSC_FALSE;
2946   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2947   PetscFunctionReturn(0);
2948 }
2949 
2950 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2951 {
2952   PetscInt       i,m,nz,nz_max=0,*nnz;
2953   PetscScalar    *values=0;
2954   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2955   PetscErrorCode ierr;
2956 
2957   PetscFunctionBegin;
2958   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2959   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2960   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2961   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2962   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2963   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2964   m    = B->rmap->n/bs;
2965 
2966   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2967   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2968   for (i=0; i<m; i++) {
2969     nz = ii[i+1]- ii[i];
2970     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2971     nz_max = PetscMax(nz_max, nz);
2972     nnz[i] = nz;
2973   }
2974   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2975   ierr = PetscFree(nnz);CHKERRQ(ierr);
2976 
2977   values = (PetscScalar*)V;
2978   if (!values) {
2979     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2980   }
2981   for (i=0; i<m; i++) {
2982     PetscInt          ncols  = ii[i+1] - ii[i];
2983     const PetscInt    *icols = jj + ii[i];
2984     if (bs == 1 || !roworiented) {
2985       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2986       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2987     } else {
2988       PetscInt j;
2989       for (j=0; j<ncols; j++) {
2990         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2991         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2992       }
2993     }
2994   }
2995   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2996   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2997   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2998   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2999   PetscFunctionReturn(0);
3000 }
3001 
3002 /*MC
3003    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3004    block sparse compressed row format.
3005 
3006    Options Database Keys:
3007 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3008 
3009    Level: beginner
3010 
3011    Notes:
3012     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3013     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3014 
3015 .seealso: MatCreateSeqBAIJ()
3016 M*/
3017 
3018 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3019 
3020 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3021 {
3022   PetscErrorCode ierr;
3023   PetscMPIInt    size;
3024   Mat_SeqBAIJ    *b;
3025 
3026   PetscFunctionBegin;
3027   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3028   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3029 
3030   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3031   B->data = (void*)b;
3032   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3033 
3034   b->row          = 0;
3035   b->col          = 0;
3036   b->icol         = 0;
3037   b->reallocs     = 0;
3038   b->saved_values = 0;
3039 
3040   b->roworiented        = PETSC_TRUE;
3041   b->nonew              = 0;
3042   b->diag               = 0;
3043   B->spptr              = 0;
3044   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3045   b->keepnonzeropattern = PETSC_FALSE;
3046 
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3049   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3051   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3056 #if defined(PETSC_HAVE_HYPRE)
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3058 #endif
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3062   PetscFunctionReturn(0);
3063 }
3064 
3065 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3066 {
3067   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3068   PetscErrorCode ierr;
3069   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3070 
3071   PetscFunctionBegin;
3072   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3073 
3074   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3075     c->imax           = a->imax;
3076     c->ilen           = a->ilen;
3077     c->free_imax_ilen = PETSC_FALSE;
3078   } else {
3079     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3080     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3081     for (i=0; i<mbs; i++) {
3082       c->imax[i] = a->imax[i];
3083       c->ilen[i] = a->ilen[i];
3084     }
3085     c->free_imax_ilen = PETSC_TRUE;
3086   }
3087 
3088   /* allocate the matrix space */
3089   if (mallocmatspace) {
3090     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3091       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3092       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3093 
3094       c->i            = a->i;
3095       c->j            = a->j;
3096       c->singlemalloc = PETSC_FALSE;
3097       c->free_a       = PETSC_TRUE;
3098       c->free_ij      = PETSC_FALSE;
3099       c->parent       = A;
3100       C->preallocated = PETSC_TRUE;
3101       C->assembled    = PETSC_TRUE;
3102 
3103       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3104       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3105       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3106     } else {
3107       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3108       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3109 
3110       c->singlemalloc = PETSC_TRUE;
3111       c->free_a       = PETSC_TRUE;
3112       c->free_ij      = PETSC_TRUE;
3113 
3114       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3115       if (mbs > 0) {
3116         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3117         if (cpvalues == MAT_COPY_VALUES) {
3118           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3119         } else {
3120           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3121         }
3122       }
3123       C->preallocated = PETSC_TRUE;
3124       C->assembled    = PETSC_TRUE;
3125     }
3126   }
3127 
3128   c->roworiented = a->roworiented;
3129   c->nonew       = a->nonew;
3130 
3131   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3132   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3133 
3134   c->bs2         = a->bs2;
3135   c->mbs         = a->mbs;
3136   c->nbs         = a->nbs;
3137 
3138   if (a->diag) {
3139     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3140       c->diag      = a->diag;
3141       c->free_diag = PETSC_FALSE;
3142     } else {
3143       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3144       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3145       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3146       c->free_diag = PETSC_TRUE;
3147     }
3148   } else c->diag = 0;
3149 
3150   c->nz         = a->nz;
3151   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3152   c->solve_work = NULL;
3153   c->mult_work  = NULL;
3154   c->sor_workt  = NULL;
3155   c->sor_work   = NULL;
3156 
3157   c->compressedrow.use   = a->compressedrow.use;
3158   c->compressedrow.nrows = a->compressedrow.nrows;
3159   if (a->compressedrow.use) {
3160     i    = a->compressedrow.nrows;
3161     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3162     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3163     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3164     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3165   } else {
3166     c->compressedrow.use    = PETSC_FALSE;
3167     c->compressedrow.i      = NULL;
3168     c->compressedrow.rindex = NULL;
3169   }
3170   C->nonzerostate = A->nonzerostate;
3171 
3172   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3173   PetscFunctionReturn(0);
3174 }
3175 
3176 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3177 {
3178   PetscErrorCode ierr;
3179 
3180   PetscFunctionBegin;
3181   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3182   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3183   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3184   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3185   PetscFunctionReturn(0);
3186 }
3187 
3188 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3189 {
3190   Mat_SeqBAIJ    *a;
3191   PetscErrorCode ierr;
3192   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3193   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3194   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3195   PetscInt       *masked,nmask,tmp,bs2,ishift;
3196   PetscMPIInt    size;
3197   int            fd;
3198   PetscScalar    *aa;
3199   MPI_Comm       comm;
3200   PetscBool      isbinary;
3201 
3202   PetscFunctionBegin;
3203   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3204   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3205 
3206   /* force binary viewer to load .info file if it has not yet done so */
3207   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3208   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3209   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3210   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3211   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3212   if (bs < 0) bs = 1;
3213   bs2  = bs*bs;
3214 
3215   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3216   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3217   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3218   ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3219   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3220   M = header[1]; N = header[2]; nz = header[3];
3221 
3222   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3223   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3224 
3225   /*
3226      This code adds extra rows to make sure the number of rows is
3227     divisible by the blocksize
3228   */
3229   mbs        = M/bs;
3230   extra_rows = bs - M + bs*(mbs);
3231   if (extra_rows == bs) extra_rows = 0;
3232   else mbs++;
3233   if (extra_rows) {
3234     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3235   }
3236 
3237   /* Set global sizes if not already set */
3238   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3239     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3240   } else { /* Check if the matrix global sizes are correct */
3241     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3242     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3243       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3244     }
3245     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3246   }
3247 
3248   /* read in row lengths */
3249   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3250   ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
3251   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3252 
3253   /* read in column indices */
3254   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3255   ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3256   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3257 
3258   /* loop over row lengths determining block row lengths */
3259   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3260   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3261   ierr     = PetscArrayzero(mask,mbs);CHKERRQ(ierr);
3262   rowcount = 0;
3263   nzcount  = 0;
3264   for (i=0; i<mbs; i++) {
3265     nmask = 0;
3266     for (j=0; j<bs; j++) {
3267       kmax = rowlengths[rowcount];
3268       for (k=0; k<kmax; k++) {
3269         tmp = jj[nzcount++]/bs;
3270         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3271       }
3272       rowcount++;
3273     }
3274     browlengths[i] += nmask;
3275     /* zero out the mask elements we set */
3276     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3277   }
3278 
3279   /* Do preallocation  */
3280   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3281   a    = (Mat_SeqBAIJ*)newmat->data;
3282 
3283   /* set matrix "i" values */
3284   a->i[0] = 0;
3285   for (i=1; i<= mbs; i++) {
3286     a->i[i]      = a->i[i-1] + browlengths[i-1];
3287     a->ilen[i-1] = browlengths[i-1];
3288   }
3289   a->nz = 0;
3290   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3291 
3292   /* read in nonzero values */
3293   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3294   ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3295   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3296 
3297   /* set "a" and "j" values into matrix */
3298   nzcount = 0; jcount = 0;
3299   for (i=0; i<mbs; i++) {
3300     nzcountb = nzcount;
3301     nmask    = 0;
3302     for (j=0; j<bs; j++) {
3303       kmax = rowlengths[i*bs+j];
3304       for (k=0; k<kmax; k++) {
3305         tmp = jj[nzcount++]/bs;
3306         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3307       }
3308     }
3309     /* sort the masked values */
3310     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3311 
3312     /* set "j" values into matrix */
3313     maskcount = 1;
3314     for (j=0; j<nmask; j++) {
3315       a->j[jcount++]  = masked[j];
3316       mask[masked[j]] = maskcount++;
3317     }
3318     /* set "a" values into matrix */
3319     ishift = bs2*a->i[i];
3320     for (j=0; j<bs; j++) {
3321       kmax = rowlengths[i*bs+j];
3322       for (k=0; k<kmax; k++) {
3323         tmp       = jj[nzcountb]/bs;
3324         block     = mask[tmp] - 1;
3325         point     = jj[nzcountb] - bs*tmp;
3326         idx       = ishift + bs2*block + j + bs*point;
3327         a->a[idx] = (MatScalar)aa[nzcountb++];
3328       }
3329     }
3330     /* zero out the mask elements we set */
3331     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3332   }
3333   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3334 
3335   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3336   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3337   ierr = PetscFree(aa);CHKERRQ(ierr);
3338   ierr = PetscFree(jj);CHKERRQ(ierr);
3339   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3340 
3341   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3342   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3343   PetscFunctionReturn(0);
3344 }
3345 
3346 /*@C
3347    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3348    compressed row) format.  For good matrix assembly performance the
3349    user should preallocate the matrix storage by setting the parameter nz
3350    (or the array nnz).  By setting these parameters accurately, performance
3351    during matrix assembly can be increased by more than a factor of 50.
3352 
3353    Collective
3354 
3355    Input Parameters:
3356 +  comm - MPI communicator, set to PETSC_COMM_SELF
3357 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3358           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3359 .  m - number of rows
3360 .  n - number of columns
3361 .  nz - number of nonzero blocks  per block row (same for all rows)
3362 -  nnz - array containing the number of nonzero blocks in the various block rows
3363          (possibly different for each block row) or NULL
3364 
3365    Output Parameter:
3366 .  A - the matrix
3367 
3368    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3369    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3370    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3371 
3372    Options Database Keys:
3373 +   -mat_no_unroll - uses code that does not unroll the loops in the
3374                      block calculations (much slower)
3375 -    -mat_block_size - size of the blocks to use
3376 
3377    Level: intermediate
3378 
3379    Notes:
3380    The number of rows and columns must be divisible by blocksize.
3381 
3382    If the nnz parameter is given then the nz parameter is ignored
3383 
3384    A nonzero block is any block that as 1 or more nonzeros in it
3385 
3386    The block AIJ format is fully compatible with standard Fortran 77
3387    storage.  That is, the stored row and column indices can begin at
3388    either one (as in Fortran) or zero.  See the users' manual for details.
3389 
3390    Specify the preallocated storage with either nz or nnz (not both).
3391    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3392    allocation.  See Users-Manual: ch_mat for details.
3393    matrices.
3394 
3395 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3396 @*/
3397 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3398 {
3399   PetscErrorCode ierr;
3400 
3401   PetscFunctionBegin;
3402   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3403   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3404   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3405   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3406   PetscFunctionReturn(0);
3407 }
3408 
3409 /*@C
3410    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3411    per row in the matrix. For good matrix assembly performance the
3412    user should preallocate the matrix storage by setting the parameter nz
3413    (or the array nnz).  By setting these parameters accurately, performance
3414    during matrix assembly can be increased by more than a factor of 50.
3415 
3416    Collective
3417 
3418    Input Parameters:
3419 +  B - the matrix
3420 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3421           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3422 .  nz - number of block nonzeros per block row (same for all rows)
3423 -  nnz - array containing the number of block nonzeros in the various block rows
3424          (possibly different for each block row) or NULL
3425 
3426    Options Database Keys:
3427 +   -mat_no_unroll - uses code that does not unroll the loops in the
3428                      block calculations (much slower)
3429 -   -mat_block_size - size of the blocks to use
3430 
3431    Level: intermediate
3432 
3433    Notes:
3434    If the nnz parameter is given then the nz parameter is ignored
3435 
3436    You can call MatGetInfo() to get information on how effective the preallocation was;
3437    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3438    You can also run with the option -info and look for messages with the string
3439    malloc in them to see if additional memory allocation was needed.
3440 
3441    The block AIJ format is fully compatible with standard Fortran 77
3442    storage.  That is, the stored row and column indices can begin at
3443    either one (as in Fortran) or zero.  See the users' manual for details.
3444 
3445    Specify the preallocated storage with either nz or nnz (not both).
3446    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3447    allocation.  See Users-Manual: ch_mat for details.
3448 
3449 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3450 @*/
3451 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3452 {
3453   PetscErrorCode ierr;
3454 
3455   PetscFunctionBegin;
3456   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3457   PetscValidType(B,1);
3458   PetscValidLogicalCollectiveInt(B,bs,2);
3459   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3460   PetscFunctionReturn(0);
3461 }
3462 
3463 /*@C
3464    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3465 
3466    Collective
3467 
3468    Input Parameters:
3469 +  B - the matrix
3470 .  i - the indices into j for the start of each local row (starts with zero)
3471 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3472 -  v - optional values in the matrix
3473 
3474    Level: advanced
3475 
3476    Notes:
3477    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3478    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3479    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3480    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3481    block column and the second index is over columns within a block.
3482 
3483    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3484 
3485 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3486 @*/
3487 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3488 {
3489   PetscErrorCode ierr;
3490 
3491   PetscFunctionBegin;
3492   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3493   PetscValidType(B,1);
3494   PetscValidLogicalCollectiveInt(B,bs,2);
3495   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3496   PetscFunctionReturn(0);
3497 }
3498 
3499 
3500 /*@
3501      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3502 
3503      Collective
3504 
3505    Input Parameters:
3506 +  comm - must be an MPI communicator of size 1
3507 .  bs - size of block
3508 .  m - number of rows
3509 .  n - number of columns
3510 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3511 .  j - column indices
3512 -  a - matrix values
3513 
3514    Output Parameter:
3515 .  mat - the matrix
3516 
3517    Level: advanced
3518 
3519    Notes:
3520        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3521     once the matrix is destroyed
3522 
3523        You cannot set new nonzero locations into this matrix, that will generate an error.
3524 
3525        The i and j indices are 0 based
3526 
3527        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3528 
3529       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3530       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3531       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3532       with column-major ordering within blocks.
3533 
3534 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3535 
3536 @*/
3537 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3538 {
3539   PetscErrorCode ierr;
3540   PetscInt       ii;
3541   Mat_SeqBAIJ    *baij;
3542 
3543   PetscFunctionBegin;
3544   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3545   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3546 
3547   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3548   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3549   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3550   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3551   baij = (Mat_SeqBAIJ*)(*mat)->data;
3552   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3553   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3554 
3555   baij->i = i;
3556   baij->j = j;
3557   baij->a = a;
3558 
3559   baij->singlemalloc = PETSC_FALSE;
3560   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3561   baij->free_a       = PETSC_FALSE;
3562   baij->free_ij      = PETSC_FALSE;
3563 
3564   for (ii=0; ii<m; ii++) {
3565     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3566 #if defined(PETSC_USE_DEBUG)
3567     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3568 #endif
3569   }
3570 #if defined(PETSC_USE_DEBUG)
3571   for (ii=0; ii<baij->i[m]; ii++) {
3572     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3573     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3574   }
3575 #endif
3576 
3577   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3578   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3579   PetscFunctionReturn(0);
3580 }
3581 
3582 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3583 {
3584   PetscErrorCode ierr;
3585   PetscMPIInt    size;
3586 
3587   PetscFunctionBegin;
3588   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3589   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3590     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3591   } else {
3592     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3593   }
3594   PetscFunctionReturn(0);
3595 }
3596