xref: /petsc/src/mat/impls/baij/seq/baij.c (revision 5a856986583887c326abe5dfd149e8184a29cd80)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912   PetscErrorCode    ierr;
913 
914   PetscFunctionBegin;
915   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
916   stepval = (n-1)*4;
917   for (k=0; k<m; k++) { /* loop over added rows */
918     row  = im[k];
919     rp   = aj + ai[row];
920     ap   = aa + 16*ai[row];
921     nrow = ailen[row];
922     low  = 0;
923     high = nrow;
924     for (l=0; l<n; l++) { /* loop over added columns */
925       col = in[l];
926       if (col <= lastcol)  low = 0;
927       else                high = nrow;
928       lastcol = col;
929       value   = v + k*(stepval+4 + l)*4;
930       while (high-low > 7) {
931         t = (low+high)/2;
932         if (rp[t] > col) high = t;
933         else             low  = t;
934       }
935       for (i=low; i<high; i++) {
936         if (rp[i] > col) break;
937         if (rp[i] == col) {
938           bap = ap +  16*i;
939           for (ii=0; ii<4; ii++,value+=stepval) {
940             for (jj=ii; jj<16; jj+=4) {
941               bap[jj] += *value++;
942             }
943           }
944           goto noinsert2;
945         }
946       }
947       N = nrow++ - 1;
948       high++; /* added new column index thus must search to one higher than before */
949       /* shift up all the later entries in this row */
950       for (ii=N; ii>=i; ii--) {
951         rp[ii+1] = rp[ii];
952         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
953       }
954       if (N >= i) {
955         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
956       }
957       rp[i] = col;
958       bap   = ap +  16*i;
959       for (ii=0; ii<4; ii++,value+=stepval) {
960         for (jj=ii; jj<16; jj+=4) {
961           bap[jj] = *value++;
962         }
963       }
964       noinsert2:;
965       low = i;
966     }
967     ailen[row] = nrow;
968   }
969   PetscFunctionReturnVoid();
970 }
971 
972 #if defined(PETSC_HAVE_FORTRAN_CAPS)
973 #define matsetvalues4_ MATSETVALUES4
974 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
975 #define matsetvalues4_ matsetvalues4
976 #endif
977 
978 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
979 {
980   Mat         A  = *AA;
981   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
982   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
983   PetscInt    *ai=a->i,*ailen=a->ilen;
984   PetscInt    *aj=a->j,brow,bcol;
985   PetscInt    ridx,cidx,lastcol = -1;
986   MatScalar   *ap,value,*aa=a->a,*bap;
987   PetscErrorCode ierr;
988 
989   PetscFunctionBegin;
990   for (k=0; k<m; k++) { /* loop over added rows */
991     row  = im[k]; brow = row/4;
992     rp   = aj + ai[brow];
993     ap   = aa + 16*ai[brow];
994     nrow = ailen[brow];
995     low  = 0;
996     high = nrow;
997     for (l=0; l<n; l++) { /* loop over added columns */
998       col   = in[l]; bcol = col/4;
999       ridx  = row % 4; cidx = col % 4;
1000       value = v[l + k*n];
1001       if (col <= lastcol)  low = 0;
1002       else                high = nrow;
1003       lastcol = col;
1004       while (high-low > 7) {
1005         t = (low+high)/2;
1006         if (rp[t] > bcol) high = t;
1007         else              low  = t;
1008       }
1009       for (i=low; i<high; i++) {
1010         if (rp[i] > bcol) break;
1011         if (rp[i] == bcol) {
1012           bap   = ap +  16*i + 4*cidx + ridx;
1013           *bap += value;
1014           goto noinsert1;
1015         }
1016       }
1017       N = nrow++ - 1;
1018       high++; /* added new column thus must search to one higher than before */
1019       /* shift up all the later entries in this row */
1020       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1021       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1022       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1023       rp[i]                    = bcol;
1024       ap[16*i + 4*cidx + ridx] = value;
1025 noinsert1:;
1026       low = i;
1027     }
1028     ailen[brow] = nrow;
1029   }
1030   PetscFunctionReturnVoid();
1031 }
1032 
1033 /*
1034      Checks for missing diagonals
1035 */
1036 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1037 {
1038   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040   PetscInt       *diag,*ii = a->i,i;
1041 
1042   PetscFunctionBegin;
1043   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1044   *missing = PETSC_FALSE;
1045   if (A->rmap->n > 0 && !ii) {
1046     *missing = PETSC_TRUE;
1047     if (d) *d = 0;
1048     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1049   } else {
1050     diag = a->diag;
1051     for (i=0; i<a->mbs; i++) {
1052       if (diag[i] >= ii[i+1]) {
1053         *missing = PETSC_TRUE;
1054         if (d) *d = i;
1055         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1056         break;
1057       }
1058     }
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 
1088 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1089 {
1090   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1091   PetscErrorCode ierr;
1092   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1093   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1094 
1095   PetscFunctionBegin;
1096   *nn = n;
1097   if (!ia) PetscFunctionReturn(0);
1098   if (symmetric) {
1099     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1100     nz   = tia[n];
1101   } else {
1102     tia = a->i; tja = a->j;
1103   }
1104 
1105   if (!blockcompressed && bs > 1) {
1106     (*nn) *= bs;
1107     /* malloc & create the natural set of indices */
1108     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1109     if (n) {
1110       (*ia)[0] = oshift;
1111       for (j=1; j<bs; j++) {
1112         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1113       }
1114     }
1115 
1116     for (i=1; i<n; i++) {
1117       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1118       for (j=1; j<bs; j++) {
1119         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1120       }
1121     }
1122     if (n) {
1123       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1124     }
1125 
1126     if (inja) {
1127       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1128       cnt = 0;
1129       for (i=0; i<n; i++) {
1130         for (j=0; j<bs; j++) {
1131           for (k=tia[i]; k<tia[i+1]; k++) {
1132             for (l=0; l<bs; l++) {
1133               (*ja)[cnt++] = bs*tja[k] + l;
1134             }
1135           }
1136         }
1137       }
1138     }
1139 
1140     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1141       ierr = PetscFree(tia);CHKERRQ(ierr);
1142       ierr = PetscFree(tja);CHKERRQ(ierr);
1143     }
1144   } else if (oshift == 1) {
1145     if (symmetric) {
1146       nz = tia[A->rmap->n/bs];
1147       /*  add 1 to i and j indices */
1148       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1149       *ia = tia;
1150       if (ja) {
1151         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1152         *ja = tja;
1153       }
1154     } else {
1155       nz = a->i[A->rmap->n/bs];
1156       /* malloc space and  add 1 to i and j indices */
1157       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1158       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1159       if (ja) {
1160         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1161         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1162       }
1163     }
1164   } else {
1165     *ia = tia;
1166     if (ja) *ja = tja;
1167   }
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1172 {
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   if (!ia) PetscFunctionReturn(0);
1177   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1178     ierr = PetscFree(*ia);CHKERRQ(ierr);
1179     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1180   }
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1185 {
1186   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190 #if defined(PETSC_USE_LOG)
1191   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1192 #endif
1193   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1194   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1195   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1196   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1197   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1198   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1200   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1201   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1202   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1203   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1204   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1205   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1206 
1207   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1208   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1209   ierr = PetscFree(A->data);CHKERRQ(ierr);
1210 
1211   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1222 #if defined(PETSC_HAVE_HYPRE)
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1224 #endif
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1231 {
1232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   switch (op) {
1237   case MAT_ROW_ORIENTED:
1238     a->roworiented = flg;
1239     break;
1240   case MAT_KEEP_NONZERO_PATTERN:
1241     a->keepnonzeropattern = flg;
1242     break;
1243   case MAT_NEW_NONZERO_LOCATIONS:
1244     a->nonew = (flg ? 0 : 1);
1245     break;
1246   case MAT_NEW_NONZERO_LOCATION_ERR:
1247     a->nonew = (flg ? -1 : 0);
1248     break;
1249   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1250     a->nonew = (flg ? -2 : 0);
1251     break;
1252   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1253     a->nounused = (flg ? -1 : 0);
1254     break;
1255   case MAT_NEW_DIAGONALS:
1256   case MAT_IGNORE_OFF_PROC_ENTRIES:
1257   case MAT_USE_HASH_TABLE:
1258   case MAT_SORTED_FULL:
1259     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1260     break;
1261   case MAT_SPD:
1262   case MAT_SYMMETRIC:
1263   case MAT_STRUCTURALLY_SYMMETRIC:
1264   case MAT_HERMITIAN:
1265   case MAT_SYMMETRY_ETERNAL:
1266   case MAT_SUBMAT_SINGLEIS:
1267   case MAT_STRUCTURE_ONLY:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1277 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1278 {
1279   PetscErrorCode ierr;
1280   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1281   MatScalar      *aa_i;
1282   PetscScalar    *v_i;
1283 
1284   PetscFunctionBegin;
1285   bs  = A->rmap->bs;
1286   bs2 = bs*bs;
1287   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1288 
1289   bn  = row/bs;   /* Block number */
1290   bp  = row % bs; /* Block Position */
1291   M   = ai[bn+1] - ai[bn];
1292   *nz = bs*M;
1293 
1294   if (v) {
1295     *v = 0;
1296     if (*nz) {
1297       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1298       for (i=0; i<M; i++) { /* for each block in the block row */
1299         v_i  = *v + i*bs;
1300         aa_i = aa + bs2*(ai[bn] + i);
1301         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1302       }
1303     }
1304   }
1305 
1306   if (idx) {
1307     *idx = 0;
1308     if (*nz) {
1309       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1310       for (i=0; i<M; i++) { /* for each block in the block row */
1311         idx_i = *idx + i*bs;
1312         itmp  = bs*aj[ai[bn] + i];
1313         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1314       }
1315     }
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1321 {
1322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1331 {
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1336   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1341 {
1342   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1343   Mat            C;
1344   PetscErrorCode ierr;
1345   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1346   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1347   MatScalar      *ata,*aa=a->a;
1348 
1349   PetscFunctionBegin;
1350   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1351   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1352     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1353 
1354     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1355     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1356     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1357     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1358 
1359     at  = (Mat_SeqBAIJ*)C->data;
1360     ati = at->i;
1361     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1362   } else {
1363     C = *B;
1364     at = (Mat_SeqBAIJ*)C->data;
1365     ati = at->i;
1366   }
1367 
1368   atj = at->j;
1369   ata = at->a;
1370 
1371   /* Copy ati into atfill so we have locations of the next free space in atj */
1372   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1373 
1374   /* Walk through A row-wise and mark nonzero entries of A^T. */
1375   for (i=0; i<mbs; i++) {
1376     anzj = ai[i+1] - ai[i];
1377     for (j=0; j<anzj; j++) {
1378       atj[atfill[*aj]] = i;
1379       for (kr=0; kr<bs; kr++) {
1380         for (k=0; k<bs; k++) {
1381           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1382         }
1383       }
1384       atfill[*aj++] += 1;
1385     }
1386   }
1387   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1388   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389 
1390   /* Clean up temporary space and complete requests. */
1391   ierr = PetscFree(atfill);CHKERRQ(ierr);
1392 
1393   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1394     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1395     *B = C;
1396   } else {
1397     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1403 {
1404   PetscErrorCode ierr;
1405   Mat            Btrans;
1406 
1407   PetscFunctionBegin;
1408   *f   = PETSC_FALSE;
1409   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1410   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1411   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1416 {
1417   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1418   PetscErrorCode ierr;
1419   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1420   int            fd;
1421   PetscScalar    *aa;
1422   FILE           *file;
1423 
1424   PetscFunctionBegin;
1425   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1426   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1427   col_lens[0] = MAT_FILE_CLASSID;
1428 
1429   col_lens[1] = A->rmap->N;
1430   col_lens[2] = A->cmap->n;
1431   col_lens[3] = a->nz*bs2;
1432 
1433   /* store lengths of each row and write (including header) to file */
1434   count = 0;
1435   for (i=0; i<a->mbs; i++) {
1436     for (j=0; j<bs; j++) {
1437       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1438     }
1439   }
1440   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1441   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1442 
1443   /* store column indices (zero start index) */
1444   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1445   count = 0;
1446   for (i=0; i<a->mbs; i++) {
1447     for (j=0; j<bs; j++) {
1448       for (k=a->i[i]; k<a->i[i+1]; k++) {
1449         for (l=0; l<bs; l++) {
1450           jj[count++] = bs*a->j[k] + l;
1451         }
1452       }
1453     }
1454   }
1455   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1456   ierr = PetscFree(jj);CHKERRQ(ierr);
1457 
1458   /* store nonzero values */
1459   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1460   count = 0;
1461   for (i=0; i<a->mbs; i++) {
1462     for (j=0; j<bs; j++) {
1463       for (k=a->i[i]; k<a->i[i+1]; k++) {
1464         for (l=0; l<bs; l++) {
1465           aa[count++] = a->a[bs2*k + l*bs + j];
1466         }
1467       }
1468     }
1469   }
1470   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1471   ierr = PetscFree(aa);CHKERRQ(ierr);
1472 
1473   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1474   if (file) {
1475     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1476   }
1477   PetscFunctionReturn(0);
1478 }
1479 
1480 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1481 {
1482   PetscErrorCode ierr;
1483   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1484   PetscInt       i,bs = A->rmap->bs,k;
1485 
1486   PetscFunctionBegin;
1487   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1488   for (i=0; i<a->mbs; i++) {
1489     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1490     for (k=a->i[i]; k<a->i[i+1]; k++) {
1491       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1492     }
1493     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1494   }
1495   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1496   PetscFunctionReturn(0);
1497 }
1498 
1499 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1500 {
1501   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1502   PetscErrorCode    ierr;
1503   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1504   PetscViewerFormat format;
1505 
1506   PetscFunctionBegin;
1507   if (A->structure_only) {
1508     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1509     PetscFunctionReturn(0);
1510   }
1511 
1512   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1513   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1514     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1515   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1516     const char *matname;
1517     Mat        aij;
1518     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1519     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1520     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1521     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1522     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1523   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1524       PetscFunctionReturn(0);
1525   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1526     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1527     for (i=0; i<a->mbs; i++) {
1528       for (j=0; j<bs; j++) {
1529         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1530         for (k=a->i[i]; k<a->i[i+1]; k++) {
1531           for (l=0; l<bs; l++) {
1532 #if defined(PETSC_USE_COMPLEX)
1533             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1535                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1536             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1537               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1538                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1539             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1540               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1541             }
1542 #else
1543             if (a->a[bs2*k + l*bs + j] != 0.0) {
1544               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1545             }
1546 #endif
1547           }
1548         }
1549         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1550       }
1551     }
1552     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1553   } else {
1554     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1555     for (i=0; i<a->mbs; i++) {
1556       for (j=0; j<bs; j++) {
1557         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1558         for (k=a->i[i]; k<a->i[i+1]; k++) {
1559           for (l=0; l<bs; l++) {
1560 #if defined(PETSC_USE_COMPLEX)
1561             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1563                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1564             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1565               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1566                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1567             } else {
1568               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1569             }
1570 #else
1571             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1572 #endif
1573           }
1574         }
1575         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1576       }
1577     }
1578     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1579   }
1580   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #include <petscdraw.h>
1585 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1586 {
1587   Mat               A = (Mat) Aa;
1588   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1589   PetscErrorCode    ierr;
1590   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1591   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1592   MatScalar         *aa;
1593   PetscViewer       viewer;
1594   PetscViewerFormat format;
1595 
1596   PetscFunctionBegin;
1597   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1598   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1599   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1600 
1601   /* loop over matrix elements drawing boxes */
1602 
1603   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1604     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1605     /* Blue for negative, Cyan for zero and  Red for positive */
1606     color = PETSC_DRAW_BLUE;
1607     for (i=0,row=0; i<mbs; i++,row+=bs) {
1608       for (j=a->i[i]; j<a->i[i+1]; j++) {
1609         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1610         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1611         aa  = a->a + j*bs2;
1612         for (k=0; k<bs; k++) {
1613           for (l=0; l<bs; l++) {
1614             if (PetscRealPart(*aa++) >=  0.) continue;
1615             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1616           }
1617         }
1618       }
1619     }
1620     color = PETSC_DRAW_CYAN;
1621     for (i=0,row=0; i<mbs; i++,row+=bs) {
1622       for (j=a->i[i]; j<a->i[i+1]; j++) {
1623         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1624         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1625         aa  = a->a + j*bs2;
1626         for (k=0; k<bs; k++) {
1627           for (l=0; l<bs; l++) {
1628             if (PetscRealPart(*aa++) != 0.) continue;
1629             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1630           }
1631         }
1632       }
1633     }
1634     color = PETSC_DRAW_RED;
1635     for (i=0,row=0; i<mbs; i++,row+=bs) {
1636       for (j=a->i[i]; j<a->i[i+1]; j++) {
1637         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1638         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1639         aa  = a->a + j*bs2;
1640         for (k=0; k<bs; k++) {
1641           for (l=0; l<bs; l++) {
1642             if (PetscRealPart(*aa++) <= 0.) continue;
1643             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1644           }
1645         }
1646       }
1647     }
1648     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1649   } else {
1650     /* use contour shading to indicate magnitude of values */
1651     /* first determine max of all nonzero values */
1652     PetscReal minv = 0.0, maxv = 0.0;
1653     PetscDraw popup;
1654 
1655     for (i=0; i<a->nz*a->bs2; i++) {
1656       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1657     }
1658     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1659     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1660     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1661 
1662     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1663     for (i=0,row=0; i<mbs; i++,row+=bs) {
1664       for (j=a->i[i]; j<a->i[i+1]; j++) {
1665         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1666         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1667         aa  = a->a + j*bs2;
1668         for (k=0; k<bs; k++) {
1669           for (l=0; l<bs; l++) {
1670             MatScalar v = *aa++;
1671             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1672             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1673           }
1674         }
1675       }
1676     }
1677     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1678   }
1679   PetscFunctionReturn(0);
1680 }
1681 
1682 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1683 {
1684   PetscErrorCode ierr;
1685   PetscReal      xl,yl,xr,yr,w,h;
1686   PetscDraw      draw;
1687   PetscBool      isnull;
1688 
1689   PetscFunctionBegin;
1690   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1691   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1692   if (isnull) PetscFunctionReturn(0);
1693 
1694   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1695   xr  += w;          yr += h;        xl = -w;     yl = -h;
1696   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1697   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1698   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1699   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1700   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1701   PetscFunctionReturn(0);
1702 }
1703 
1704 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1705 {
1706   PetscErrorCode ierr;
1707   PetscBool      iascii,isbinary,isdraw;
1708 
1709   PetscFunctionBegin;
1710   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1713   if (iascii) {
1714     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1715   } else if (isbinary) {
1716     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1717   } else if (isdraw) {
1718     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1719   } else {
1720     Mat B;
1721     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1722     ierr = MatView(B,viewer);CHKERRQ(ierr);
1723     ierr = MatDestroy(&B);CHKERRQ(ierr);
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 
1728 
1729 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1730 {
1731   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1732   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1733   PetscInt    *ai = a->i,*ailen = a->ilen;
1734   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1735   MatScalar   *ap,*aa = a->a;
1736 
1737   PetscFunctionBegin;
1738   for (k=0; k<m; k++) { /* loop over rows */
1739     row = im[k]; brow = row/bs;
1740     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1741     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1742     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1743     nrow = ailen[brow];
1744     for (l=0; l<n; l++) { /* loop over columns */
1745       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1746       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1747       col  = in[l];
1748       bcol = col/bs;
1749       cidx = col%bs;
1750       ridx = row%bs;
1751       high = nrow;
1752       low  = 0; /* assume unsorted */
1753       while (high-low > 5) {
1754         t = (low+high)/2;
1755         if (rp[t] > bcol) high = t;
1756         else             low  = t;
1757       }
1758       for (i=low; i<high; i++) {
1759         if (rp[i] > bcol) break;
1760         if (rp[i] == bcol) {
1761           *v++ = ap[bs2*i+bs*cidx+ridx];
1762           goto finished;
1763         }
1764       }
1765       *v++ = 0.0;
1766 finished:;
1767     }
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1773 {
1774   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1775   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1776   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1777   PetscErrorCode    ierr;
1778   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1779   PetscBool         roworiented=a->roworiented;
1780   const PetscScalar *value     = v;
1781   MatScalar         *ap=NULL,*aa = a->a,*bap;
1782 
1783   PetscFunctionBegin;
1784   if (roworiented) {
1785     stepval = (n-1)*bs;
1786   } else {
1787     stepval = (m-1)*bs;
1788   }
1789   for (k=0; k<m; k++) { /* loop over added rows */
1790     row = im[k];
1791     if (row < 0) continue;
1792 #if defined(PETSC_USE_DEBUG)
1793     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1794 #endif
1795     rp   = aj + ai[row];
1796     if (!A->structure_only) ap = aa + bs2*ai[row];
1797     rmax = imax[row];
1798     nrow = ailen[row];
1799     low  = 0;
1800     high = nrow;
1801     for (l=0; l<n; l++) { /* loop over added columns */
1802       if (in[l] < 0) continue;
1803 #if defined(PETSC_USE_DEBUG)
1804       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1805 #endif
1806       col = in[l];
1807       if (!A->structure_only) {
1808         if (roworiented) {
1809           value = v + (k*(stepval+bs) + l)*bs;
1810         } else {
1811           value = v + (l*(stepval+bs) + k)*bs;
1812         }
1813       }
1814       if (col <= lastcol) low = 0;
1815       else high = nrow;
1816       lastcol = col;
1817       while (high-low > 7) {
1818         t = (low+high)/2;
1819         if (rp[t] > col) high = t;
1820         else             low  = t;
1821       }
1822       for (i=low; i<high; i++) {
1823         if (rp[i] > col) break;
1824         if (rp[i] == col) {
1825           if (A->structure_only) goto noinsert2;
1826           bap = ap +  bs2*i;
1827           if (roworiented) {
1828             if (is == ADD_VALUES) {
1829               for (ii=0; ii<bs; ii++,value+=stepval) {
1830                 for (jj=ii; jj<bs2; jj+=bs) {
1831                   bap[jj] += *value++;
1832                 }
1833               }
1834             } else {
1835               for (ii=0; ii<bs; ii++,value+=stepval) {
1836                 for (jj=ii; jj<bs2; jj+=bs) {
1837                   bap[jj] = *value++;
1838                 }
1839               }
1840             }
1841           } else {
1842             if (is == ADD_VALUES) {
1843               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1844                 for (jj=0; jj<bs; jj++) {
1845                   bap[jj] += value[jj];
1846                 }
1847                 bap += bs;
1848               }
1849             } else {
1850               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1851                 for (jj=0; jj<bs; jj++) {
1852                   bap[jj]  = value[jj];
1853                 }
1854                 bap += bs;
1855               }
1856             }
1857           }
1858           goto noinsert2;
1859         }
1860       }
1861       if (nonew == 1) goto noinsert2;
1862       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1863       if (A->structure_only) {
1864         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1865       } else {
1866         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1867       }
1868       N = nrow++ - 1; high++;
1869       /* shift up all the later entries in this row */
1870       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1871       rp[i] = col;
1872       if (!A->structure_only) {
1873         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1874         bap   = ap +  bs2*i;
1875         if (roworiented) {
1876           for (ii=0; ii<bs; ii++,value+=stepval) {
1877             for (jj=ii; jj<bs2; jj+=bs) {
1878               bap[jj] = *value++;
1879             }
1880           }
1881         } else {
1882           for (ii=0; ii<bs; ii++,value+=stepval) {
1883             for (jj=0; jj<bs; jj++) {
1884               *bap++ = *value++;
1885             }
1886           }
1887         }
1888       }
1889 noinsert2:;
1890       low = i;
1891     }
1892     ailen[row] = nrow;
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 
1897 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1898 {
1899   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1900   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1901   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1902   PetscErrorCode ierr;
1903   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1904   MatScalar      *aa  = a->a,*ap;
1905   PetscReal      ratio=0.6;
1906 
1907   PetscFunctionBegin;
1908   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1909 
1910   if (m) rmax = ailen[0];
1911   for (i=1; i<mbs; i++) {
1912     /* move each row back by the amount of empty slots (fshift) before it*/
1913     fshift += imax[i-1] - ailen[i-1];
1914     rmax    = PetscMax(rmax,ailen[i]);
1915     if (fshift) {
1916       ip = aj + ai[i];
1917       ap = aa + bs2*ai[i];
1918       N  = ailen[i];
1919       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1920       if (!A->structure_only) {
1921         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1922       }
1923     }
1924     ai[i] = ai[i-1] + ailen[i-1];
1925   }
1926   if (mbs) {
1927     fshift += imax[mbs-1] - ailen[mbs-1];
1928     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1929   }
1930 
1931   /* reset ilen and imax for each row */
1932   a->nonzerorowcnt = 0;
1933   if (A->structure_only) {
1934     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1935   } else { /* !A->structure_only */
1936     for (i=0; i<mbs; i++) {
1937       ailen[i] = imax[i] = ai[i+1] - ai[i];
1938       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1939     }
1940   }
1941   a->nz = ai[mbs];
1942 
1943   /* diagonals may have moved, so kill the diagonal pointers */
1944   a->idiagvalid = PETSC_FALSE;
1945   if (fshift && a->diag) {
1946     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1947     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1948     a->diag = 0;
1949   }
1950   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1951   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1952   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1953   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1954 
1955   A->info.mallocs    += a->reallocs;
1956   a->reallocs         = 0;
1957   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1958   a->rmax             = rmax;
1959 
1960   if (!A->structure_only) {
1961     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1962   }
1963   PetscFunctionReturn(0);
1964 }
1965 
1966 /*
1967    This function returns an array of flags which indicate the locations of contiguous
1968    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1969    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1970    Assume: sizes should be long enough to hold all the values.
1971 */
1972 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1973 {
1974   PetscInt  i,j,k,row;
1975   PetscBool flg;
1976 
1977   PetscFunctionBegin;
1978   for (i=0,j=0; i<n; j++) {
1979     row = idx[i];
1980     if (row%bs!=0) { /* Not the begining of a block */
1981       sizes[j] = 1;
1982       i++;
1983     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1984       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1985       i++;
1986     } else { /* Begining of the block, so check if the complete block exists */
1987       flg = PETSC_TRUE;
1988       for (k=1; k<bs; k++) {
1989         if (row+k != idx[i+k]) { /* break in the block */
1990           flg = PETSC_FALSE;
1991           break;
1992         }
1993       }
1994       if (flg) { /* No break in the bs */
1995         sizes[j] = bs;
1996         i       += bs;
1997       } else {
1998         sizes[j] = 1;
1999         i++;
2000       }
2001     }
2002   }
2003   *bs_max = j;
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2008 {
2009   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2010   PetscErrorCode    ierr;
2011   PetscInt          i,j,k,count,*rows;
2012   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2013   PetscScalar       zero = 0.0;
2014   MatScalar         *aa;
2015   const PetscScalar *xx;
2016   PetscScalar       *bb;
2017 
2018   PetscFunctionBegin;
2019   /* fix right hand side if needed */
2020   if (x && b) {
2021     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2022     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2023     for (i=0; i<is_n; i++) {
2024       bb[is_idx[i]] = diag*xx[is_idx[i]];
2025     }
2026     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2027     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2028   }
2029 
2030   /* Make a copy of the IS and  sort it */
2031   /* allocate memory for rows,sizes */
2032   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2033 
2034   /* copy IS values to rows, and sort them */
2035   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2036   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2037 
2038   if (baij->keepnonzeropattern) {
2039     for (i=0; i<is_n; i++) sizes[i] = 1;
2040     bs_max          = is_n;
2041   } else {
2042     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2043     A->nonzerostate++;
2044   }
2045 
2046   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2047     row = rows[j];
2048     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2049     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2050     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2051     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2052       if (diag != (PetscScalar)0.0) {
2053         if (baij->ilen[row/bs] > 0) {
2054           baij->ilen[row/bs]       = 1;
2055           baij->j[baij->i[row/bs]] = row/bs;
2056 
2057           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2058         }
2059         /* Now insert all the diagonal values for this bs */
2060         for (k=0; k<bs; k++) {
2061           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2062         }
2063       } else { /* (diag == 0.0) */
2064         baij->ilen[row/bs] = 0;
2065       } /* end (diag == 0.0) */
2066     } else { /* (sizes[i] != bs) */
2067 #if defined(PETSC_USE_DEBUG)
2068       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2069 #endif
2070       for (k=0; k<count; k++) {
2071         aa[0] =  zero;
2072         aa   += bs;
2073       }
2074       if (diag != (PetscScalar)0.0) {
2075         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2076       }
2077     }
2078   }
2079 
2080   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2081   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2086 {
2087   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2088   PetscErrorCode    ierr;
2089   PetscInt          i,j,k,count;
2090   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2091   PetscScalar       zero = 0.0;
2092   MatScalar         *aa;
2093   const PetscScalar *xx;
2094   PetscScalar       *bb;
2095   PetscBool         *zeroed,vecs = PETSC_FALSE;
2096 
2097   PetscFunctionBegin;
2098   /* fix right hand side if needed */
2099   if (x && b) {
2100     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2101     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2102     vecs = PETSC_TRUE;
2103   }
2104 
2105   /* zero the columns */
2106   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2107   for (i=0; i<is_n; i++) {
2108     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2109     zeroed[is_idx[i]] = PETSC_TRUE;
2110   }
2111   for (i=0; i<A->rmap->N; i++) {
2112     if (!zeroed[i]) {
2113       row = i/bs;
2114       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2115         for (k=0; k<bs; k++) {
2116           col = bs*baij->j[j] + k;
2117           if (zeroed[col]) {
2118             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2119             if (vecs) bb[i] -= aa[0]*xx[col];
2120             aa[0] = 0.0;
2121           }
2122         }
2123       }
2124     } else if (vecs) bb[i] = diag*xx[i];
2125   }
2126   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2127   if (vecs) {
2128     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2129     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2130   }
2131 
2132   /* zero the rows */
2133   for (i=0; i<is_n; i++) {
2134     row   = is_idx[i];
2135     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2136     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2137     for (k=0; k<count; k++) {
2138       aa[0] =  zero;
2139       aa   += bs;
2140     }
2141     if (diag != (PetscScalar)0.0) {
2142       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2143     }
2144   }
2145   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2146   PetscFunctionReturn(0);
2147 }
2148 
2149 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2150 {
2151   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2152   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2153   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2154   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2155   PetscErrorCode ierr;
2156   PetscInt       ridx,cidx,bs2=a->bs2;
2157   PetscBool      roworiented=a->roworiented;
2158   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2159 
2160   PetscFunctionBegin;
2161   for (k=0; k<m; k++) { /* loop over added rows */
2162     row  = im[k];
2163     brow = row/bs;
2164     if (row < 0) continue;
2165 #if defined(PETSC_USE_DEBUG)
2166     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2167 #endif
2168     rp   = aj + ai[brow];
2169     if (!A->structure_only) ap = aa + bs2*ai[brow];
2170     rmax = imax[brow];
2171     nrow = ailen[brow];
2172     low  = 0;
2173     high = nrow;
2174     for (l=0; l<n; l++) { /* loop over added columns */
2175       if (in[l] < 0) continue;
2176 #if defined(PETSC_USE_DEBUG)
2177       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2178 #endif
2179       col  = in[l]; bcol = col/bs;
2180       ridx = row % bs; cidx = col % bs;
2181       if (!A->structure_only) {
2182         if (roworiented) {
2183           value = v[l + k*n];
2184         } else {
2185           value = v[k + l*m];
2186         }
2187       }
2188       if (col <= lastcol) low = 0; else high = nrow;
2189       lastcol = col;
2190       while (high-low > 7) {
2191         t = (low+high)/2;
2192         if (rp[t] > bcol) high = t;
2193         else              low  = t;
2194       }
2195       for (i=low; i<high; i++) {
2196         if (rp[i] > bcol) break;
2197         if (rp[i] == bcol) {
2198           bap = ap +  bs2*i + bs*cidx + ridx;
2199           if (!A->structure_only) {
2200             if (is == ADD_VALUES) *bap += value;
2201             else                  *bap  = value;
2202           }
2203           goto noinsert1;
2204         }
2205       }
2206       if (nonew == 1) goto noinsert1;
2207       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2208       if (A->structure_only) {
2209         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2210       } else {
2211         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2212       }
2213       N = nrow++ - 1; high++;
2214       /* shift up all the later entries in this row */
2215       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2216       rp[i] = bcol;
2217       if (!A->structure_only) {
2218         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2219         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2220         ap[bs2*i + bs*cidx + ridx] = value;
2221       }
2222       a->nz++;
2223       A->nonzerostate++;
2224 noinsert1:;
2225       low = i;
2226     }
2227     ailen[brow] = nrow;
2228   }
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2233 {
2234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2235   Mat            outA;
2236   PetscErrorCode ierr;
2237   PetscBool      row_identity,col_identity;
2238 
2239   PetscFunctionBegin;
2240   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2241   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2242   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2243   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2244 
2245   outA            = inA;
2246   inA->factortype = MAT_FACTOR_LU;
2247   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2248   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2249 
2250   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2251 
2252   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2253   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2254   a->row = row;
2255   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2256   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2257   a->col = col;
2258 
2259   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2260   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2261   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2262   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2263 
2264   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2265   if (!a->solve_work) {
2266     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2267     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2268   }
2269   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2270   PetscFunctionReturn(0);
2271 }
2272 
2273 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2274 {
2275   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2276   PetscInt    i,nz,mbs;
2277 
2278   PetscFunctionBegin;
2279   nz  = baij->maxnz;
2280   mbs = baij->mbs;
2281   for (i=0; i<nz; i++) {
2282     baij->j[i] = indices[i];
2283   }
2284   baij->nz = nz;
2285   for (i=0; i<mbs; i++) {
2286     baij->ilen[i] = baij->imax[i];
2287   }
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 /*@
2292     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2293        in the matrix.
2294 
2295   Input Parameters:
2296 +  mat - the SeqBAIJ matrix
2297 -  indices - the column indices
2298 
2299   Level: advanced
2300 
2301   Notes:
2302     This can be called if you have precomputed the nonzero structure of the
2303   matrix and want to provide it to the matrix object to improve the performance
2304   of the MatSetValues() operation.
2305 
2306     You MUST have set the correct numbers of nonzeros per row in the call to
2307   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2308 
2309     MUST be called before any calls to MatSetValues();
2310 
2311 @*/
2312 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2313 {
2314   PetscErrorCode ierr;
2315 
2316   PetscFunctionBegin;
2317   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2318   PetscValidPointer(indices,2);
2319   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2320   PetscFunctionReturn(0);
2321 }
2322 
2323 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2324 {
2325   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2326   PetscErrorCode ierr;
2327   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2328   PetscReal      atmp;
2329   PetscScalar    *x,zero = 0.0;
2330   MatScalar      *aa;
2331   PetscInt       ncols,brow,krow,kcol;
2332 
2333   PetscFunctionBegin;
2334   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2335   bs  = A->rmap->bs;
2336   aa  = a->a;
2337   ai  = a->i;
2338   aj  = a->j;
2339   mbs = a->mbs;
2340 
2341   ierr = VecSet(v,zero);CHKERRQ(ierr);
2342   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2343   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2344   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2345   for (i=0; i<mbs; i++) {
2346     ncols = ai[1] - ai[0]; ai++;
2347     brow  = bs*i;
2348     for (j=0; j<ncols; j++) {
2349       for (kcol=0; kcol<bs; kcol++) {
2350         for (krow=0; krow<bs; krow++) {
2351           atmp = PetscAbsScalar(*aa);aa++;
2352           row  = brow + krow;   /* row index */
2353           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2354         }
2355       }
2356       aj++;
2357     }
2358   }
2359   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2364 {
2365   PetscErrorCode ierr;
2366 
2367   PetscFunctionBegin;
2368   /* If the two matrices have the same copy implementation, use fast copy. */
2369   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2370     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2371     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2372     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2373 
2374     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2375     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2376     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2377     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2378   } else {
2379     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2380   }
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2385 {
2386   PetscErrorCode ierr;
2387 
2388   PetscFunctionBegin;
2389   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2394 {
2395   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2396 
2397   PetscFunctionBegin;
2398   *array = a->a;
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2403 {
2404   PetscFunctionBegin;
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2409 {
2410   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2411   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2412   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2413   PetscErrorCode ierr;
2414 
2415   PetscFunctionBegin;
2416   /* Set the number of nonzeros in the new matrix */
2417   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2422 {
2423   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2424   PetscErrorCode ierr;
2425   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2426   PetscBLASInt   one=1;
2427 
2428   PetscFunctionBegin;
2429   if (str == SAME_NONZERO_PATTERN) {
2430     PetscScalar  alpha = a;
2431     PetscBLASInt bnz;
2432     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2433     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2434     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2435   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2436     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2437   } else {
2438     Mat      B;
2439     PetscInt *nnz;
2440     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2441     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2442     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2443     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2444     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2445     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2446     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2447     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2448     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2449     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2450     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2451     ierr = PetscFree(nnz);CHKERRQ(ierr);
2452   }
2453   PetscFunctionReturn(0);
2454 }
2455 
2456 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2457 {
2458   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2459   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2460   MatScalar   *aa = a->a;
2461 
2462   PetscFunctionBegin;
2463   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2464   PetscFunctionReturn(0);
2465 }
2466 
2467 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2468 {
2469   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2470   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2471   MatScalar   *aa = a->a;
2472 
2473   PetscFunctionBegin;
2474   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 /*
2479     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2480 */
2481 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2482 {
2483   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2484   PetscErrorCode ierr;
2485   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2486   PetscInt       nz = a->i[m],row,*jj,mr,col;
2487 
2488   PetscFunctionBegin;
2489   *nn = n;
2490   if (!ia) PetscFunctionReturn(0);
2491   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2492   else {
2493     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2494     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2495     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2496     jj   = a->j;
2497     for (i=0; i<nz; i++) {
2498       collengths[jj[i]]++;
2499     }
2500     cia[0] = oshift;
2501     for (i=0; i<n; i++) {
2502       cia[i+1] = cia[i] + collengths[i];
2503     }
2504     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2505     jj   = a->j;
2506     for (row=0; row<m; row++) {
2507       mr = a->i[row+1] - a->i[row];
2508       for (i=0; i<mr; i++) {
2509         col = *jj++;
2510 
2511         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2512       }
2513     }
2514     ierr = PetscFree(collengths);CHKERRQ(ierr);
2515     *ia  = cia; *ja = cja;
2516   }
2517   PetscFunctionReturn(0);
2518 }
2519 
2520 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2521 {
2522   PetscErrorCode ierr;
2523 
2524   PetscFunctionBegin;
2525   if (!ia) PetscFunctionReturn(0);
2526   ierr = PetscFree(*ia);CHKERRQ(ierr);
2527   ierr = PetscFree(*ja);CHKERRQ(ierr);
2528   PetscFunctionReturn(0);
2529 }
2530 
2531 /*
2532  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2533  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2534  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2535  */
2536 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2537 {
2538   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2539   PetscErrorCode ierr;
2540   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2541   PetscInt       nz = a->i[m],row,*jj,mr,col;
2542   PetscInt       *cspidx;
2543 
2544   PetscFunctionBegin;
2545   *nn = n;
2546   if (!ia) PetscFunctionReturn(0);
2547 
2548   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2550   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2551   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2552   jj   = a->j;
2553   for (i=0; i<nz; i++) {
2554     collengths[jj[i]]++;
2555   }
2556   cia[0] = oshift;
2557   for (i=0; i<n; i++) {
2558     cia[i+1] = cia[i] + collengths[i];
2559   }
2560   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2561   jj   = a->j;
2562   for (row=0; row<m; row++) {
2563     mr = a->i[row+1] - a->i[row];
2564     for (i=0; i<mr; i++) {
2565       col = *jj++;
2566       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2567       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2568     }
2569   }
2570   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2571   *ia    = cia;
2572   *ja    = cja;
2573   *spidx = cspidx;
2574   PetscFunctionReturn(0);
2575 }
2576 
2577 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2578 {
2579   PetscErrorCode ierr;
2580 
2581   PetscFunctionBegin;
2582   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2583   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2584   PetscFunctionReturn(0);
2585 }
2586 
2587 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2588 {
2589   PetscErrorCode ierr;
2590   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2591 
2592   PetscFunctionBegin;
2593   if (!Y->preallocated || !aij->nz) {
2594     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2595   }
2596   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 /* -------------------------------------------------------------------*/
2601 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2602                                        MatGetRow_SeqBAIJ,
2603                                        MatRestoreRow_SeqBAIJ,
2604                                        MatMult_SeqBAIJ_N,
2605                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2606                                        MatMultTranspose_SeqBAIJ,
2607                                        MatMultTransposeAdd_SeqBAIJ,
2608                                        0,
2609                                        0,
2610                                        0,
2611                                /* 10*/ 0,
2612                                        MatLUFactor_SeqBAIJ,
2613                                        0,
2614                                        0,
2615                                        MatTranspose_SeqBAIJ,
2616                                /* 15*/ MatGetInfo_SeqBAIJ,
2617                                        MatEqual_SeqBAIJ,
2618                                        MatGetDiagonal_SeqBAIJ,
2619                                        MatDiagonalScale_SeqBAIJ,
2620                                        MatNorm_SeqBAIJ,
2621                                /* 20*/ 0,
2622                                        MatAssemblyEnd_SeqBAIJ,
2623                                        MatSetOption_SeqBAIJ,
2624                                        MatZeroEntries_SeqBAIJ,
2625                                /* 24*/ MatZeroRows_SeqBAIJ,
2626                                        0,
2627                                        0,
2628                                        0,
2629                                        0,
2630                                /* 29*/ MatSetUp_SeqBAIJ,
2631                                        0,
2632                                        0,
2633                                        0,
2634                                        0,
2635                                /* 34*/ MatDuplicate_SeqBAIJ,
2636                                        0,
2637                                        0,
2638                                        MatILUFactor_SeqBAIJ,
2639                                        0,
2640                                /* 39*/ MatAXPY_SeqBAIJ,
2641                                        MatCreateSubMatrices_SeqBAIJ,
2642                                        MatIncreaseOverlap_SeqBAIJ,
2643                                        MatGetValues_SeqBAIJ,
2644                                        MatCopy_SeqBAIJ,
2645                                /* 44*/ 0,
2646                                        MatScale_SeqBAIJ,
2647                                        MatShift_SeqBAIJ,
2648                                        0,
2649                                        MatZeroRowsColumns_SeqBAIJ,
2650                                /* 49*/ 0,
2651                                        MatGetRowIJ_SeqBAIJ,
2652                                        MatRestoreRowIJ_SeqBAIJ,
2653                                        MatGetColumnIJ_SeqBAIJ,
2654                                        MatRestoreColumnIJ_SeqBAIJ,
2655                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2656                                        0,
2657                                        0,
2658                                        0,
2659                                        MatSetValuesBlocked_SeqBAIJ,
2660                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2661                                        MatDestroy_SeqBAIJ,
2662                                        MatView_SeqBAIJ,
2663                                        0,
2664                                        0,
2665                                /* 64*/ 0,
2666                                        0,
2667                                        0,
2668                                        0,
2669                                        0,
2670                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2671                                        0,
2672                                        MatConvert_Basic,
2673                                        0,
2674                                        0,
2675                                /* 74*/ 0,
2676                                        MatFDColoringApply_BAIJ,
2677                                        0,
2678                                        0,
2679                                        0,
2680                                /* 79*/ 0,
2681                                        0,
2682                                        0,
2683                                        0,
2684                                        MatLoad_SeqBAIJ,
2685                                /* 84*/ 0,
2686                                        0,
2687                                        0,
2688                                        0,
2689                                        0,
2690                                /* 89*/ 0,
2691                                        0,
2692                                        0,
2693                                        0,
2694                                        0,
2695                                /* 94*/ 0,
2696                                        0,
2697                                        0,
2698                                        0,
2699                                        0,
2700                                /* 99*/ 0,
2701                                        0,
2702                                        0,
2703                                        0,
2704                                        0,
2705                                /*104*/ 0,
2706                                        MatRealPart_SeqBAIJ,
2707                                        MatImaginaryPart_SeqBAIJ,
2708                                        0,
2709                                        0,
2710                                /*109*/ 0,
2711                                        0,
2712                                        0,
2713                                        0,
2714                                        MatMissingDiagonal_SeqBAIJ,
2715                                /*114*/ 0,
2716                                        0,
2717                                        0,
2718                                        0,
2719                                        0,
2720                                /*119*/ 0,
2721                                        0,
2722                                        MatMultHermitianTranspose_SeqBAIJ,
2723                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2724                                        0,
2725                                /*124*/ 0,
2726                                        0,
2727                                        MatInvertBlockDiagonal_SeqBAIJ,
2728                                        0,
2729                                        0,
2730                                /*129*/ 0,
2731                                        0,
2732                                        0,
2733                                        0,
2734                                        0,
2735                                /*134*/ 0,
2736                                        0,
2737                                        0,
2738                                        0,
2739                                        0,
2740                                /*139*/ MatSetBlockSizes_Default,
2741                                        0,
2742                                        0,
2743                                        MatFDColoringSetUp_SeqXAIJ,
2744                                        0,
2745                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2746                                        MatDestroySubMatrices_SeqBAIJ
2747 };
2748 
2749 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2750 {
2751   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2752   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2753   PetscErrorCode ierr;
2754 
2755   PetscFunctionBegin;
2756   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2757 
2758   /* allocate space for values if not already there */
2759   if (!aij->saved_values) {
2760     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2761     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2762   }
2763 
2764   /* copy values over */
2765   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2770 {
2771   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2772   PetscErrorCode ierr;
2773   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2774 
2775   PetscFunctionBegin;
2776   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2777   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2778 
2779   /* copy values over */
2780   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2781   PetscFunctionReturn(0);
2782 }
2783 
2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2785 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2786 
2787 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2788 {
2789   Mat_SeqBAIJ    *b;
2790   PetscErrorCode ierr;
2791   PetscInt       i,mbs,nbs,bs2;
2792   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2793 
2794   PetscFunctionBegin;
2795   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2796   if (nz == MAT_SKIP_ALLOCATION) {
2797     skipallocation = PETSC_TRUE;
2798     nz             = 0;
2799   }
2800 
2801   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2802   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2804   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2805 
2806   B->preallocated = PETSC_TRUE;
2807 
2808   mbs = B->rmap->n/bs;
2809   nbs = B->cmap->n/bs;
2810   bs2 = bs*bs;
2811 
2812   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2813 
2814   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2815   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2816   if (nnz) {
2817     for (i=0; i<mbs; i++) {
2818       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2819       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2820     }
2821   }
2822 
2823   b    = (Mat_SeqBAIJ*)B->data;
2824   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2825   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2826   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2827 
2828   if (!flg) {
2829     switch (bs) {
2830     case 1:
2831       B->ops->mult    = MatMult_SeqBAIJ_1;
2832       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2833       break;
2834     case 2:
2835       B->ops->mult    = MatMult_SeqBAIJ_2;
2836       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2837       break;
2838     case 3:
2839       B->ops->mult    = MatMult_SeqBAIJ_3;
2840       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2841       break;
2842     case 4:
2843       B->ops->mult    = MatMult_SeqBAIJ_4;
2844       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2845       break;
2846     case 5:
2847       B->ops->mult    = MatMult_SeqBAIJ_5;
2848       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2849       break;
2850     case 6:
2851       B->ops->mult    = MatMult_SeqBAIJ_6;
2852       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2853       break;
2854     case 7:
2855       B->ops->mult    = MatMult_SeqBAIJ_7;
2856       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2857       break;
2858     case 9:
2859 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2860       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2862 #else
2863       B->ops->mult    = MatMult_SeqBAIJ_N;
2864       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2865 #endif
2866       break;
2867     case 11:
2868       B->ops->mult    = MatMult_SeqBAIJ_11;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2870       break;
2871     case 15:
2872       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2873       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2874       break;
2875     default:
2876       B->ops->mult    = MatMult_SeqBAIJ_N;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878       break;
2879     }
2880   }
2881   B->ops->sor = MatSOR_SeqBAIJ;
2882   b->mbs = mbs;
2883   b->nbs = nbs;
2884   if (!skipallocation) {
2885     if (!b->imax) {
2886       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2887       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2888 
2889       b->free_imax_ilen = PETSC_TRUE;
2890     }
2891     /* b->ilen will count nonzeros in each block row so far. */
2892     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2893     if (!nnz) {
2894       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2895       else if (nz < 0) nz = 1;
2896       nz = PetscMin(nz,nbs);
2897       for (i=0; i<mbs; i++) b->imax[i] = nz;
2898       nz = nz*mbs;
2899     } else {
2900       nz = 0;
2901       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2902     }
2903 
2904     /* allocate the matrix space */
2905     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2906     if (B->structure_only) {
2907       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2908       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2909       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2910     } else {
2911       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2912       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2913       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2914     }
2915     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2916 
2917     if (B->structure_only) {
2918       b->singlemalloc = PETSC_FALSE;
2919       b->free_a       = PETSC_FALSE;
2920     } else {
2921       b->singlemalloc = PETSC_TRUE;
2922       b->free_a       = PETSC_TRUE;
2923     }
2924     b->free_ij = PETSC_TRUE;
2925 
2926     b->i[0] = 0;
2927     for (i=1; i<mbs+1; i++) {
2928       b->i[i] = b->i[i-1] + b->imax[i-1];
2929     }
2930 
2931   } else {
2932     b->free_a  = PETSC_FALSE;
2933     b->free_ij = PETSC_FALSE;
2934   }
2935 
2936   b->bs2              = bs2;
2937   b->mbs              = mbs;
2938   b->nz               = 0;
2939   b->maxnz            = nz;
2940   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2941   B->was_assembled    = PETSC_FALSE;
2942   B->assembled        = PETSC_FALSE;
2943   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2944   PetscFunctionReturn(0);
2945 }
2946 
2947 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2948 {
2949   PetscInt       i,m,nz,nz_max=0,*nnz;
2950   PetscScalar    *values=0;
2951   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2952   PetscErrorCode ierr;
2953 
2954   PetscFunctionBegin;
2955   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2956   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2957   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2958   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2959   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2960   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2961   m    = B->rmap->n/bs;
2962 
2963   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2964   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2965   for (i=0; i<m; i++) {
2966     nz = ii[i+1]- ii[i];
2967     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2968     nz_max = PetscMax(nz_max, nz);
2969     nnz[i] = nz;
2970   }
2971   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2972   ierr = PetscFree(nnz);CHKERRQ(ierr);
2973 
2974   values = (PetscScalar*)V;
2975   if (!values) {
2976     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2977   }
2978   for (i=0; i<m; i++) {
2979     PetscInt          ncols  = ii[i+1] - ii[i];
2980     const PetscInt    *icols = jj + ii[i];
2981     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2982     if (!roworiented) {
2983       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2984     } else {
2985       PetscInt j;
2986       for (j=0; j<ncols; j++) {
2987         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
2988         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
2989       }
2990     }
2991   }
2992   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
2993   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2994   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2995   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
2996   PetscFunctionReturn(0);
2997 }
2998 
2999 /*MC
3000    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3001    block sparse compressed row format.
3002 
3003    Options Database Keys:
3004 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3005 
3006   Level: beginner
3007 
3008 .seealso: MatCreateSeqBAIJ()
3009 M*/
3010 
3011 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3012 
3013 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3014 {
3015   PetscErrorCode ierr;
3016   PetscMPIInt    size;
3017   Mat_SeqBAIJ    *b;
3018 
3019   PetscFunctionBegin;
3020   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3021   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3022 
3023   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3024   B->data = (void*)b;
3025   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3026 
3027   b->row          = 0;
3028   b->col          = 0;
3029   b->icol         = 0;
3030   b->reallocs     = 0;
3031   b->saved_values = 0;
3032 
3033   b->roworiented        = PETSC_TRUE;
3034   b->nonew              = 0;
3035   b->diag               = 0;
3036   B->spptr              = 0;
3037   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3038   b->keepnonzeropattern = PETSC_FALSE;
3039 
3040   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3041   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3042   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3043   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3044   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3045   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3046   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3047   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3048   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3049 #if defined(PETSC_HAVE_HYPRE)
3050   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3051 #endif
3052   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3055   PetscFunctionReturn(0);
3056 }
3057 
3058 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3059 {
3060   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3061   PetscErrorCode ierr;
3062   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3063 
3064   PetscFunctionBegin;
3065   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3066 
3067   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3068     c->imax           = a->imax;
3069     c->ilen           = a->ilen;
3070     c->free_imax_ilen = PETSC_FALSE;
3071   } else {
3072     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3073     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3074     for (i=0; i<mbs; i++) {
3075       c->imax[i] = a->imax[i];
3076       c->ilen[i] = a->ilen[i];
3077     }
3078     c->free_imax_ilen = PETSC_TRUE;
3079   }
3080 
3081   /* allocate the matrix space */
3082   if (mallocmatspace) {
3083     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3084       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3085       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3086 
3087       c->i            = a->i;
3088       c->j            = a->j;
3089       c->singlemalloc = PETSC_FALSE;
3090       c->free_a       = PETSC_TRUE;
3091       c->free_ij      = PETSC_FALSE;
3092       c->parent       = A;
3093       C->preallocated = PETSC_TRUE;
3094       C->assembled    = PETSC_TRUE;
3095 
3096       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3097       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3098       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3099     } else {
3100       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3101       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3102 
3103       c->singlemalloc = PETSC_TRUE;
3104       c->free_a       = PETSC_TRUE;
3105       c->free_ij      = PETSC_TRUE;
3106 
3107       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3108       if (mbs > 0) {
3109         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3110         if (cpvalues == MAT_COPY_VALUES) {
3111           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3112         } else {
3113           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3114         }
3115       }
3116       C->preallocated = PETSC_TRUE;
3117       C->assembled    = PETSC_TRUE;
3118     }
3119   }
3120 
3121   c->roworiented = a->roworiented;
3122   c->nonew       = a->nonew;
3123 
3124   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3125   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3126 
3127   c->bs2         = a->bs2;
3128   c->mbs         = a->mbs;
3129   c->nbs         = a->nbs;
3130 
3131   if (a->diag) {
3132     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3133       c->diag      = a->diag;
3134       c->free_diag = PETSC_FALSE;
3135     } else {
3136       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3137       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3138       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3139       c->free_diag = PETSC_TRUE;
3140     }
3141   } else c->diag = 0;
3142 
3143   c->nz         = a->nz;
3144   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3145   c->solve_work = NULL;
3146   c->mult_work  = NULL;
3147   c->sor_workt  = NULL;
3148   c->sor_work   = NULL;
3149 
3150   c->compressedrow.use   = a->compressedrow.use;
3151   c->compressedrow.nrows = a->compressedrow.nrows;
3152   if (a->compressedrow.use) {
3153     i    = a->compressedrow.nrows;
3154     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3155     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3156     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3157     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3158   } else {
3159     c->compressedrow.use    = PETSC_FALSE;
3160     c->compressedrow.i      = NULL;
3161     c->compressedrow.rindex = NULL;
3162   }
3163   C->nonzerostate = A->nonzerostate;
3164 
3165   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3166   PetscFunctionReturn(0);
3167 }
3168 
3169 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3170 {
3171   PetscErrorCode ierr;
3172 
3173   PetscFunctionBegin;
3174   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3175   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3176   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3177   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3182 {
3183   Mat_SeqBAIJ    *a;
3184   PetscErrorCode ierr;
3185   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3186   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3187   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3188   PetscInt       *masked,nmask,tmp,bs2,ishift;
3189   PetscMPIInt    size;
3190   int            fd;
3191   PetscScalar    *aa;
3192   MPI_Comm       comm;
3193   PetscBool      isbinary;
3194 
3195   PetscFunctionBegin;
3196   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3197   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3198 
3199   /* force binary viewer to load .info file if it has not yet done so */
3200   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3201   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3202   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3203   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3204   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3205   if (bs < 0) bs = 1;
3206   bs2  = bs*bs;
3207 
3208   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3209   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3210   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3211   ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3212   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3213   M = header[1]; N = header[2]; nz = header[3];
3214 
3215   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3216   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3217 
3218   /*
3219      This code adds extra rows to make sure the number of rows is
3220     divisible by the blocksize
3221   */
3222   mbs        = M/bs;
3223   extra_rows = bs - M + bs*(mbs);
3224   if (extra_rows == bs) extra_rows = 0;
3225   else mbs++;
3226   if (extra_rows) {
3227     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3228   }
3229 
3230   /* Set global sizes if not already set */
3231   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3232     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3233   } else { /* Check if the matrix global sizes are correct */
3234     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3235     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3236       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3237     }
3238     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3239   }
3240 
3241   /* read in row lengths */
3242   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3243   ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
3244   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3245 
3246   /* read in column indices */
3247   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3248   ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3249   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3250 
3251   /* loop over row lengths determining block row lengths */
3252   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3253   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3254   ierr     = PetscArrayzero(mask,mbs);CHKERRQ(ierr);
3255   rowcount = 0;
3256   nzcount  = 0;
3257   for (i=0; i<mbs; i++) {
3258     nmask = 0;
3259     for (j=0; j<bs; j++) {
3260       kmax = rowlengths[rowcount];
3261       for (k=0; k<kmax; k++) {
3262         tmp = jj[nzcount++]/bs;
3263         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3264       }
3265       rowcount++;
3266     }
3267     browlengths[i] += nmask;
3268     /* zero out the mask elements we set */
3269     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3270   }
3271 
3272   /* Do preallocation  */
3273   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3274   a    = (Mat_SeqBAIJ*)newmat->data;
3275 
3276   /* set matrix "i" values */
3277   a->i[0] = 0;
3278   for (i=1; i<= mbs; i++) {
3279     a->i[i]      = a->i[i-1] + browlengths[i-1];
3280     a->ilen[i-1] = browlengths[i-1];
3281   }
3282   a->nz = 0;
3283   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3284 
3285   /* read in nonzero values */
3286   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3287   ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3288   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3289 
3290   /* set "a" and "j" values into matrix */
3291   nzcount = 0; jcount = 0;
3292   for (i=0; i<mbs; i++) {
3293     nzcountb = nzcount;
3294     nmask    = 0;
3295     for (j=0; j<bs; j++) {
3296       kmax = rowlengths[i*bs+j];
3297       for (k=0; k<kmax; k++) {
3298         tmp = jj[nzcount++]/bs;
3299         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3300       }
3301     }
3302     /* sort the masked values */
3303     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3304 
3305     /* set "j" values into matrix */
3306     maskcount = 1;
3307     for (j=0; j<nmask; j++) {
3308       a->j[jcount++]  = masked[j];
3309       mask[masked[j]] = maskcount++;
3310     }
3311     /* set "a" values into matrix */
3312     ishift = bs2*a->i[i];
3313     for (j=0; j<bs; j++) {
3314       kmax = rowlengths[i*bs+j];
3315       for (k=0; k<kmax; k++) {
3316         tmp       = jj[nzcountb]/bs;
3317         block     = mask[tmp] - 1;
3318         point     = jj[nzcountb] - bs*tmp;
3319         idx       = ishift + bs2*block + j + bs*point;
3320         a->a[idx] = (MatScalar)aa[nzcountb++];
3321       }
3322     }
3323     /* zero out the mask elements we set */
3324     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3325   }
3326   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3327 
3328   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3329   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3330   ierr = PetscFree(aa);CHKERRQ(ierr);
3331   ierr = PetscFree(jj);CHKERRQ(ierr);
3332   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3333 
3334   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3335   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3336   PetscFunctionReturn(0);
3337 }
3338 
3339 /*@C
3340    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3341    compressed row) format.  For good matrix assembly performance the
3342    user should preallocate the matrix storage by setting the parameter nz
3343    (or the array nnz).  By setting these parameters accurately, performance
3344    during matrix assembly can be increased by more than a factor of 50.
3345 
3346    Collective
3347 
3348    Input Parameters:
3349 +  comm - MPI communicator, set to PETSC_COMM_SELF
3350 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3351           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3352 .  m - number of rows
3353 .  n - number of columns
3354 .  nz - number of nonzero blocks  per block row (same for all rows)
3355 -  nnz - array containing the number of nonzero blocks in the various block rows
3356          (possibly different for each block row) or NULL
3357 
3358    Output Parameter:
3359 .  A - the matrix
3360 
3361    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3362    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3363    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3364 
3365    Options Database Keys:
3366 .   -mat_no_unroll - uses code that does not unroll the loops in the
3367                      block calculations (much slower)
3368 .    -mat_block_size - size of the blocks to use
3369 
3370    Level: intermediate
3371 
3372    Notes:
3373    The number of rows and columns must be divisible by blocksize.
3374 
3375    If the nnz parameter is given then the nz parameter is ignored
3376 
3377    A nonzero block is any block that as 1 or more nonzeros in it
3378 
3379    The block AIJ format is fully compatible with standard Fortran 77
3380    storage.  That is, the stored row and column indices can begin at
3381    either one (as in Fortran) or zero.  See the users' manual for details.
3382 
3383    Specify the preallocated storage with either nz or nnz (not both).
3384    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3385    allocation.  See Users-Manual: ch_mat for details.
3386    matrices.
3387 
3388 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3389 @*/
3390 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3391 {
3392   PetscErrorCode ierr;
3393 
3394   PetscFunctionBegin;
3395   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3396   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3397   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3398   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 /*@C
3403    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3404    per row in the matrix. For good matrix assembly performance the
3405    user should preallocate the matrix storage by setting the parameter nz
3406    (or the array nnz).  By setting these parameters accurately, performance
3407    during matrix assembly can be increased by more than a factor of 50.
3408 
3409    Collective
3410 
3411    Input Parameters:
3412 +  B - the matrix
3413 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3414           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3415 .  nz - number of block nonzeros per block row (same for all rows)
3416 -  nnz - array containing the number of block nonzeros in the various block rows
3417          (possibly different for each block row) or NULL
3418 
3419    Options Database Keys:
3420 .   -mat_no_unroll - uses code that does not unroll the loops in the
3421                      block calculations (much slower)
3422 .   -mat_block_size - size of the blocks to use
3423 
3424    Level: intermediate
3425 
3426    Notes:
3427    If the nnz parameter is given then the nz parameter is ignored
3428 
3429    You can call MatGetInfo() to get information on how effective the preallocation was;
3430    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3431    You can also run with the option -info and look for messages with the string
3432    malloc in them to see if additional memory allocation was needed.
3433 
3434    The block AIJ format is fully compatible with standard Fortran 77
3435    storage.  That is, the stored row and column indices can begin at
3436    either one (as in Fortran) or zero.  See the users' manual for details.
3437 
3438    Specify the preallocated storage with either nz or nnz (not both).
3439    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3440    allocation.  See Users-Manual: ch_mat for details.
3441 
3442 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3443 @*/
3444 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3445 {
3446   PetscErrorCode ierr;
3447 
3448   PetscFunctionBegin;
3449   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3450   PetscValidType(B,1);
3451   PetscValidLogicalCollectiveInt(B,bs,2);
3452   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3453   PetscFunctionReturn(0);
3454 }
3455 
3456 /*@C
3457    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3458    (the default sequential PETSc format).
3459 
3460    Collective
3461 
3462    Input Parameters:
3463 +  B - the matrix
3464 .  i - the indices into j for the start of each local row (starts with zero)
3465 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3466 -  v - optional values in the matrix
3467 
3468    Level: developer
3469 
3470    Notes:
3471    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3472    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3473    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3474    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3475    block column and the second index is over columns within a block.
3476 
3477 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3478 @*/
3479 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3480 {
3481   PetscErrorCode ierr;
3482 
3483   PetscFunctionBegin;
3484   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3485   PetscValidType(B,1);
3486   PetscValidLogicalCollectiveInt(B,bs,2);
3487   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3488   PetscFunctionReturn(0);
3489 }
3490 
3491 
3492 /*@
3493      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3494 
3495      Collective
3496 
3497    Input Parameters:
3498 +  comm - must be an MPI communicator of size 1
3499 .  bs - size of block
3500 .  m - number of rows
3501 .  n - number of columns
3502 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3503 .  j - column indices
3504 -  a - matrix values
3505 
3506    Output Parameter:
3507 .  mat - the matrix
3508 
3509    Level: advanced
3510 
3511    Notes:
3512        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3513     once the matrix is destroyed
3514 
3515        You cannot set new nonzero locations into this matrix, that will generate an error.
3516 
3517        The i and j indices are 0 based
3518 
3519        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3520 
3521       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3522       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3523       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3524       with column-major ordering within blocks.
3525 
3526 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3527 
3528 @*/
3529 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3530 {
3531   PetscErrorCode ierr;
3532   PetscInt       ii;
3533   Mat_SeqBAIJ    *baij;
3534 
3535   PetscFunctionBegin;
3536   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3537   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3538 
3539   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3540   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3541   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3542   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3543   baij = (Mat_SeqBAIJ*)(*mat)->data;
3544   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3545   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3546 
3547   baij->i = i;
3548   baij->j = j;
3549   baij->a = a;
3550 
3551   baij->singlemalloc = PETSC_FALSE;
3552   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3553   baij->free_a       = PETSC_FALSE;
3554   baij->free_ij      = PETSC_FALSE;
3555 
3556   for (ii=0; ii<m; ii++) {
3557     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3558 #if defined(PETSC_USE_DEBUG)
3559     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3560 #endif
3561   }
3562 #if defined(PETSC_USE_DEBUG)
3563   for (ii=0; ii<baij->i[m]; ii++) {
3564     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3565     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3566   }
3567 #endif
3568 
3569   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3570   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3571   PetscFunctionReturn(0);
3572 }
3573 
3574 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3575 {
3576   PetscErrorCode ierr;
3577   PetscMPIInt    size;
3578 
3579   PetscFunctionBegin;
3580   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3581   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3582     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3583   } else {
3584     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3585   }
3586   PetscFunctionReturn(0);
3587 }
3588