xref: /petsc/src/mat/impls/baij/seq/baij.c (revision fc8a9adeb7fcdc98711d755fa2dc544ddccf0f3e)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 PETSC_INTERN PetscErrorCode MatPtAP_IS_XAIJ(Mat,Mat,MatReuse,PetscReal,Mat*);
20 
21 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
22 {
23   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
24   PetscErrorCode ierr;
25   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
26   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
27   PetscReal      shift = 0.0;
28   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
29 
30   PetscFunctionBegin;
31   allowzeropivot = PetscNot(A->erroriffailure);
32 
33   if (a->idiagvalid) {
34     if (values) *values = a->idiag;
35     PetscFunctionReturn(0);
36   }
37   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
38   diag_offset = a->diag;
39   if (!a->idiag) {
40     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
41     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
42   }
43   diag  = a->idiag;
44   if (values) *values = a->idiag;
45   /* factor and invert each block */
46   switch (bs) {
47   case 1:
48     for (i=0; i<mbs; i++) {
49       odiag    = v + 1*diag_offset[i];
50       diag[0]  = odiag[0];
51 
52       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
53         if (allowzeropivot) {
54           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
55           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
56           A->factorerror_zeropivot_row   = i;
57           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
58         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
59       }
60 
61       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
62       diag    += 1;
63     }
64     break;
65   case 2:
66     for (i=0; i<mbs; i++) {
67       odiag    = v + 4*diag_offset[i];
68       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
69       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
70       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
71       diag    += 4;
72     }
73     break;
74   case 3:
75     for (i=0; i<mbs; i++) {
76       odiag    = v + 9*diag_offset[i];
77       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
78       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
79       diag[8]  = odiag[8];
80       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
81       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
82       diag    += 9;
83     }
84     break;
85   case 4:
86     for (i=0; i<mbs; i++) {
87       odiag  = v + 16*diag_offset[i];
88       ierr   = PetscMemcpy(diag,odiag,16*sizeof(PetscScalar));CHKERRQ(ierr);
89       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
90       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
91       diag  += 16;
92     }
93     break;
94   case 5:
95     for (i=0; i<mbs; i++) {
96       odiag  = v + 25*diag_offset[i];
97       ierr   = PetscMemcpy(diag,odiag,25*sizeof(PetscScalar));CHKERRQ(ierr);
98       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
99       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
100       diag  += 25;
101     }
102     break;
103   case 6:
104     for (i=0; i<mbs; i++) {
105       odiag  = v + 36*diag_offset[i];
106       ierr   = PetscMemcpy(diag,odiag,36*sizeof(PetscScalar));CHKERRQ(ierr);
107       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
108       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
109       diag  += 36;
110     }
111     break;
112   case 7:
113     for (i=0; i<mbs; i++) {
114       odiag  = v + 49*diag_offset[i];
115       ierr   = PetscMemcpy(diag,odiag,49*sizeof(PetscScalar));CHKERRQ(ierr);
116       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
117       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
118       diag  += 49;
119     }
120     break;
121   default:
122     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
123     for (i=0; i<mbs; i++) {
124       odiag  = v + bs2*diag_offset[i];
125       ierr   = PetscMemcpy(diag,odiag,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
126       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
127       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
128       diag  += bs2;
129     }
130     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
131   }
132   a->idiagvalid = PETSC_TRUE;
133   PetscFunctionReturn(0);
134 }
135 
136 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
137 {
138   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
139   PetscScalar       *x,*work,*w,*workt,*t;
140   const MatScalar   *v,*aa = a->a, *idiag;
141   const PetscScalar *b,*xb;
142   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
143   PetscErrorCode    ierr;
144   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
145   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
146 
147   PetscFunctionBegin;
148   its = its*lits;
149   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
150   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
151   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
152   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for non-trivial relaxation factor");
153   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
154 
155   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
156 
157   if (!m) PetscFunctionReturn(0);
158   diag  = a->diag;
159   idiag = a->idiag;
160   k    = PetscMax(A->rmap->n,A->cmap->n);
161   if (!a->mult_work) {
162     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
163   }
164   if (!a->sor_workt) {
165     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
166   }
167   if (!a->sor_work) {
168     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
169   }
170   work = a->mult_work;
171   t    = a->sor_workt;
172   w    = a->sor_work;
173 
174   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
175   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
176 
177   if (flag & SOR_ZERO_INITIAL_GUESS) {
178     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
179       switch (bs) {
180       case 1:
181         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
182         t[0] = b[0];
183         i2     = 1;
184         idiag += 1;
185         for (i=1; i<m; i++) {
186           v  = aa + ai[i];
187           vi = aj + ai[i];
188           nz = diag[i] - ai[i];
189           s[0] = b[i2];
190           for (j=0; j<nz; j++) {
191             xw[0] = x[vi[j]];
192             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
193           }
194           t[i2] = s[0];
195           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
196           x[i2]  = xw[0];
197           idiag += 1;
198           i2    += 1;
199         }
200         break;
201       case 2:
202         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
203         t[0] = b[0]; t[1] = b[1];
204         i2     = 2;
205         idiag += 4;
206         for (i=1; i<m; i++) {
207           v  = aa + 4*ai[i];
208           vi = aj + ai[i];
209           nz = diag[i] - ai[i];
210           s[0] = b[i2]; s[1] = b[i2+1];
211           for (j=0; j<nz; j++) {
212             idx = 2*vi[j];
213             it  = 4*j;
214             xw[0] = x[idx]; xw[1] = x[1+idx];
215             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
216           }
217           t[i2] = s[0]; t[i2+1] = s[1];
218           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
219           x[i2]   = xw[0]; x[i2+1] = xw[1];
220           idiag  += 4;
221           i2     += 2;
222         }
223         break;
224       case 3:
225         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
226         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
227         i2     = 3;
228         idiag += 9;
229         for (i=1; i<m; i++) {
230           v  = aa + 9*ai[i];
231           vi = aj + ai[i];
232           nz = diag[i] - ai[i];
233           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
234           while (nz--) {
235             idx = 3*(*vi++);
236             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
237             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
238             v  += 9;
239           }
240           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
241           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
242           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
243           idiag  += 9;
244           i2     += 3;
245         }
246         break;
247       case 4:
248         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
249         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
250         i2     = 4;
251         idiag += 16;
252         for (i=1; i<m; i++) {
253           v  = aa + 16*ai[i];
254           vi = aj + ai[i];
255           nz = diag[i] - ai[i];
256           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
257           while (nz--) {
258             idx = 4*(*vi++);
259             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
260             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
261             v  += 16;
262           }
263           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
264           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
265           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
266           idiag  += 16;
267           i2     += 4;
268         }
269         break;
270       case 5:
271         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
272         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
273         i2     = 5;
274         idiag += 25;
275         for (i=1; i<m; i++) {
276           v  = aa + 25*ai[i];
277           vi = aj + ai[i];
278           nz = diag[i] - ai[i];
279           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
280           while (nz--) {
281             idx = 5*(*vi++);
282             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
283             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
284             v  += 25;
285           }
286           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
287           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
288           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
289           idiag  += 25;
290           i2     += 5;
291         }
292         break;
293       case 6:
294         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
295         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
296         i2     = 6;
297         idiag += 36;
298         for (i=1; i<m; i++) {
299           v  = aa + 36*ai[i];
300           vi = aj + ai[i];
301           nz = diag[i] - ai[i];
302           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
303           while (nz--) {
304             idx = 6*(*vi++);
305             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
306             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
307             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
308             v  += 36;
309           }
310           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
311           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
312           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
313           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
314           idiag  += 36;
315           i2     += 6;
316         }
317         break;
318       case 7:
319         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
320         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
321         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
322         i2     = 7;
323         idiag += 49;
324         for (i=1; i<m; i++) {
325           v  = aa + 49*ai[i];
326           vi = aj + ai[i];
327           nz = diag[i] - ai[i];
328           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
329           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
330           while (nz--) {
331             idx = 7*(*vi++);
332             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
333             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
334             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
335             v  += 49;
336           }
337           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
338           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
339           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
340           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
341           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
342           idiag  += 49;
343           i2     += 7;
344         }
345         break;
346       default:
347         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
348         ierr = PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
349         i2     = bs;
350         idiag += bs2;
351         for (i=1; i<m; i++) {
352           v  = aa + bs2*ai[i];
353           vi = aj + ai[i];
354           nz = diag[i] - ai[i];
355 
356           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
357           /* copy all rows of x that are needed into contiguous space */
358           workt = work;
359           for (j=0; j<nz; j++) {
360             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
361             workt += bs;
362           }
363           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
364           ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
365           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
366 
367           idiag += bs2;
368           i2    += bs;
369         }
370         break;
371       }
372       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
373       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
374       xb = t;
375     }
376     else xb = b;
377     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
378       idiag = a->idiag+bs2*(a->mbs-1);
379       i2 = bs * (m-1);
380       switch (bs) {
381       case 1:
382         s[0]  = xb[i2];
383         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
384         x[i2] = xw[0];
385         i2   -= 1;
386         for (i=m-2; i>=0; i--) {
387           v  = aa + (diag[i]+1);
388           vi = aj + diag[i] + 1;
389           nz = ai[i+1] - diag[i] - 1;
390           s[0] = xb[i2];
391           for (j=0; j<nz; j++) {
392             xw[0] = x[vi[j]];
393             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
394           }
395           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
396           x[i2]  = xw[0];
397           idiag -= 1;
398           i2    -= 1;
399         }
400         break;
401       case 2:
402         s[0]  = xb[i2]; s[1] = xb[i2+1];
403         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
404         x[i2] = xw[0]; x[i2+1] = xw[1];
405         i2    -= 2;
406         idiag -= 4;
407         for (i=m-2; i>=0; i--) {
408           v  = aa + 4*(diag[i] + 1);
409           vi = aj + diag[i] + 1;
410           nz = ai[i+1] - diag[i] - 1;
411           s[0] = xb[i2]; s[1] = xb[i2+1];
412           for (j=0; j<nz; j++) {
413             idx = 2*vi[j];
414             it  = 4*j;
415             xw[0] = x[idx]; xw[1] = x[1+idx];
416             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
417           }
418           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
419           x[i2]   = xw[0]; x[i2+1] = xw[1];
420           idiag  -= 4;
421           i2     -= 2;
422         }
423         break;
424       case 3:
425         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
426         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
427         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
428         i2    -= 3;
429         idiag -= 9;
430         for (i=m-2; i>=0; i--) {
431           v  = aa + 9*(diag[i]+1);
432           vi = aj + diag[i] + 1;
433           nz = ai[i+1] - diag[i] - 1;
434           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
435           while (nz--) {
436             idx = 3*(*vi++);
437             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
438             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
439             v  += 9;
440           }
441           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
442           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
443           idiag  -= 9;
444           i2     -= 3;
445         }
446         break;
447       case 4:
448         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
449         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
450         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
451         i2    -= 4;
452         idiag -= 16;
453         for (i=m-2; i>=0; i--) {
454           v  = aa + 16*(diag[i]+1);
455           vi = aj + diag[i] + 1;
456           nz = ai[i+1] - diag[i] - 1;
457           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
458           while (nz--) {
459             idx = 4*(*vi++);
460             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
461             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
462             v  += 16;
463           }
464           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
465           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
466           idiag  -= 16;
467           i2     -= 4;
468         }
469         break;
470       case 5:
471         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
472         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
473         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
474         i2    -= 5;
475         idiag -= 25;
476         for (i=m-2; i>=0; i--) {
477           v  = aa + 25*(diag[i]+1);
478           vi = aj + diag[i] + 1;
479           nz = ai[i+1] - diag[i] - 1;
480           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
481           while (nz--) {
482             idx = 5*(*vi++);
483             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
484             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
485             v  += 25;
486           }
487           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
488           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
489           idiag  -= 25;
490           i2     -= 5;
491         }
492         break;
493       case 6:
494         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
495         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
496         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
497         i2    -= 6;
498         idiag -= 36;
499         for (i=m-2; i>=0; i--) {
500           v  = aa + 36*(diag[i]+1);
501           vi = aj + diag[i] + 1;
502           nz = ai[i+1] - diag[i] - 1;
503           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
504           while (nz--) {
505             idx = 6*(*vi++);
506             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
507             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
508             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
509             v  += 36;
510           }
511           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
512           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
513           idiag  -= 36;
514           i2     -= 6;
515         }
516         break;
517       case 7:
518         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
519         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
520         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
521         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
522         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
523         i2    -= 7;
524         idiag -= 49;
525         for (i=m-2; i>=0; i--) {
526           v  = aa + 49*(diag[i]+1);
527           vi = aj + diag[i] + 1;
528           nz = ai[i+1] - diag[i] - 1;
529           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
530           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
531           while (nz--) {
532             idx = 7*(*vi++);
533             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
534             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
535             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
536             v  += 49;
537           }
538           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
539           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
540           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
541           idiag  -= 49;
542           i2     -= 7;
543         }
544         break;
545       default:
546         ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
547         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
548         i2    -= bs;
549         idiag -= bs2;
550         for (i=m-2; i>=0; i--) {
551           v  = aa + bs2*(diag[i]+1);
552           vi = aj + diag[i] + 1;
553           nz = ai[i+1] - diag[i] - 1;
554 
555           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
556           /* copy all rows of x that are needed into contiguous space */
557           workt = work;
558           for (j=0; j<nz; j++) {
559             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
560             workt += bs;
561           }
562           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
563           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
564 
565           idiag -= bs2;
566           i2    -= bs;
567         }
568         break;
569       }
570       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
571     }
572     its--;
573   }
574   while (its--) {
575     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
576       idiag = a->idiag;
577       i2 = 0;
578       switch (bs) {
579       case 1:
580         for (i=0; i<m; i++) {
581           v  = aa + ai[i];
582           vi = aj + ai[i];
583           nz = ai[i+1] - ai[i];
584           s[0] = b[i2];
585           for (j=0; j<nz; j++) {
586             xw[0] = x[vi[j]];
587             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
588           }
589           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
590           x[i2] += xw[0];
591           idiag += 1;
592           i2    += 1;
593         }
594         break;
595       case 2:
596         for (i=0; i<m; i++) {
597           v  = aa + 4*ai[i];
598           vi = aj + ai[i];
599           nz = ai[i+1] - ai[i];
600           s[0] = b[i2]; s[1] = b[i2+1];
601           for (j=0; j<nz; j++) {
602             idx = 2*vi[j];
603             it  = 4*j;
604             xw[0] = x[idx]; xw[1] = x[1+idx];
605             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
606           }
607           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
608           x[i2]  += xw[0]; x[i2+1] += xw[1];
609           idiag  += 4;
610           i2     += 2;
611         }
612         break;
613       case 3:
614         for (i=0; i<m; i++) {
615           v  = aa + 9*ai[i];
616           vi = aj + ai[i];
617           nz = ai[i+1] - ai[i];
618           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
619           while (nz--) {
620             idx = 3*(*vi++);
621             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
622             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
623             v  += 9;
624           }
625           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
626           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
627           idiag  += 9;
628           i2     += 3;
629         }
630         break;
631       case 4:
632         for (i=0; i<m; i++) {
633           v  = aa + 16*ai[i];
634           vi = aj + ai[i];
635           nz = ai[i+1] - ai[i];
636           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
637           while (nz--) {
638             idx = 4*(*vi++);
639             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
640             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
641             v  += 16;
642           }
643           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
644           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
645           idiag  += 16;
646           i2     += 4;
647         }
648         break;
649       case 5:
650         for (i=0; i<m; i++) {
651           v  = aa + 25*ai[i];
652           vi = aj + ai[i];
653           nz = ai[i+1] - ai[i];
654           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
655           while (nz--) {
656             idx = 5*(*vi++);
657             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
658             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
659             v  += 25;
660           }
661           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
662           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
663           idiag  += 25;
664           i2     += 5;
665         }
666         break;
667       case 6:
668         for (i=0; i<m; i++) {
669           v  = aa + 36*ai[i];
670           vi = aj + ai[i];
671           nz = ai[i+1] - ai[i];
672           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
673           while (nz--) {
674             idx = 6*(*vi++);
675             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
676             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
677             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
678             v  += 36;
679           }
680           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
681           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
682           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
683           idiag  += 36;
684           i2     += 6;
685         }
686         break;
687       case 7:
688         for (i=0; i<m; i++) {
689           v  = aa + 49*ai[i];
690           vi = aj + ai[i];
691           nz = ai[i+1] - ai[i];
692           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
693           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
694           while (nz--) {
695             idx = 7*(*vi++);
696             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
697             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
698             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
699             v  += 49;
700           }
701           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
702           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
703           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
704           idiag  += 49;
705           i2     += 7;
706         }
707         break;
708       default:
709         for (i=0; i<m; i++) {
710           v  = aa + bs2*ai[i];
711           vi = aj + ai[i];
712           nz = ai[i+1] - ai[i];
713 
714           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
715           /* copy all rows of x that are needed into contiguous space */
716           workt = work;
717           for (j=0; j<nz; j++) {
718             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
719             workt += bs;
720           }
721           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
722           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
723 
724           idiag += bs2;
725           i2    += bs;
726         }
727         break;
728       }
729       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
730     }
731     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
732       idiag = a->idiag+bs2*(a->mbs-1);
733       i2 = bs * (m-1);
734       switch (bs) {
735       case 1:
736         for (i=m-1; i>=0; i--) {
737           v  = aa + ai[i];
738           vi = aj + ai[i];
739           nz = ai[i+1] - ai[i];
740           s[0] = b[i2];
741           for (j=0; j<nz; j++) {
742             xw[0] = x[vi[j]];
743             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
744           }
745           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
746           x[i2] += xw[0];
747           idiag -= 1;
748           i2    -= 1;
749         }
750         break;
751       case 2:
752         for (i=m-1; i>=0; i--) {
753           v  = aa + 4*ai[i];
754           vi = aj + ai[i];
755           nz = ai[i+1] - ai[i];
756           s[0] = b[i2]; s[1] = b[i2+1];
757           for (j=0; j<nz; j++) {
758             idx = 2*vi[j];
759             it  = 4*j;
760             xw[0] = x[idx]; xw[1] = x[1+idx];
761             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
762           }
763           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
764           x[i2]  += xw[0]; x[i2+1] += xw[1];
765           idiag  -= 4;
766           i2     -= 2;
767         }
768         break;
769       case 3:
770         for (i=m-1; i>=0; i--) {
771           v  = aa + 9*ai[i];
772           vi = aj + ai[i];
773           nz = ai[i+1] - ai[i];
774           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
775           while (nz--) {
776             idx = 3*(*vi++);
777             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
778             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
779             v  += 9;
780           }
781           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
782           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
783           idiag  -= 9;
784           i2     -= 3;
785         }
786         break;
787       case 4:
788         for (i=m-1; i>=0; i--) {
789           v  = aa + 16*ai[i];
790           vi = aj + ai[i];
791           nz = ai[i+1] - ai[i];
792           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
793           while (nz--) {
794             idx = 4*(*vi++);
795             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
796             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
797             v  += 16;
798           }
799           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
800           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
801           idiag  -= 16;
802           i2     -= 4;
803         }
804         break;
805       case 5:
806         for (i=m-1; i>=0; i--) {
807           v  = aa + 25*ai[i];
808           vi = aj + ai[i];
809           nz = ai[i+1] - ai[i];
810           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
811           while (nz--) {
812             idx = 5*(*vi++);
813             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
814             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
815             v  += 25;
816           }
817           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
818           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
819           idiag  -= 25;
820           i2     -= 5;
821         }
822         break;
823       case 6:
824         for (i=m-1; i>=0; i--) {
825           v  = aa + 36*ai[i];
826           vi = aj + ai[i];
827           nz = ai[i+1] - ai[i];
828           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
829           while (nz--) {
830             idx = 6*(*vi++);
831             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
832             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
833             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
834             v  += 36;
835           }
836           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
837           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
838           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
839           idiag  -= 36;
840           i2     -= 6;
841         }
842         break;
843       case 7:
844         for (i=m-1; i>=0; i--) {
845           v  = aa + 49*ai[i];
846           vi = aj + ai[i];
847           nz = ai[i+1] - ai[i];
848           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
849           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
850           while (nz--) {
851             idx = 7*(*vi++);
852             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
853             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
854             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
855             v  += 49;
856           }
857           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
858           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
859           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
860           idiag  -= 49;
861           i2     -= 7;
862         }
863         break;
864       default:
865         for (i=m-1; i>=0; i--) {
866           v  = aa + bs2*ai[i];
867           vi = aj + ai[i];
868           nz = ai[i+1] - ai[i];
869 
870           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
871           /* copy all rows of x that are needed into contiguous space */
872           workt = work;
873           for (j=0; j<nz; j++) {
874             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
875             workt += bs;
876           }
877           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
878           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
879 
880           idiag -= bs2;
881           i2    -= bs;
882         }
883         break;
884       }
885       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
886     }
887   }
888   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
889   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 
894 /*
895     Special version for direct calls from Fortran (Used in PETSc-fun3d)
896 */
897 #if defined(PETSC_HAVE_FORTRAN_CAPS)
898 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
899 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
900 #define matsetvaluesblocked4_ matsetvaluesblocked4
901 #endif
902 
903 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
904 {
905   Mat               A  = *AA;
906   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
907   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
908   PetscInt          *ai    =a->i,*ailen=a->ilen;
909   PetscInt          *aj    =a->j,stepval,lastcol = -1;
910   const PetscScalar *value = v;
911   MatScalar         *ap,*aa = a->a,*bap;
912 
913   PetscFunctionBegin;
914   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
915   stepval = (n-1)*4;
916   for (k=0; k<m; k++) { /* loop over added rows */
917     row  = im[k];
918     rp   = aj + ai[row];
919     ap   = aa + 16*ai[row];
920     nrow = ailen[row];
921     low  = 0;
922     high = nrow;
923     for (l=0; l<n; l++) { /* loop over added columns */
924       col = in[l];
925       if (col <= lastcol)  low = 0;
926       else                high = nrow;
927       lastcol = col;
928       value   = v + k*(stepval+4 + l)*4;
929       while (high-low > 7) {
930         t = (low+high)/2;
931         if (rp[t] > col) high = t;
932         else             low  = t;
933       }
934       for (i=low; i<high; i++) {
935         if (rp[i] > col) break;
936         if (rp[i] == col) {
937           bap = ap +  16*i;
938           for (ii=0; ii<4; ii++,value+=stepval) {
939             for (jj=ii; jj<16; jj+=4) {
940               bap[jj] += *value++;
941             }
942           }
943           goto noinsert2;
944         }
945       }
946       N = nrow++ - 1;
947       high++; /* added new column index thus must search to one higher than before */
948       /* shift up all the later entries in this row */
949       for (ii=N; ii>=i; ii--) {
950         rp[ii+1] = rp[ii];
951         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
952       }
953       if (N >= i) {
954         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
955       }
956       rp[i] = col;
957       bap   = ap +  16*i;
958       for (ii=0; ii<4; ii++,value+=stepval) {
959         for (jj=ii; jj<16; jj+=4) {
960           bap[jj] = *value++;
961         }
962       }
963       noinsert2:;
964       low = i;
965     }
966     ailen[row] = nrow;
967   }
968   PetscFunctionReturnVoid();
969 }
970 
971 #if defined(PETSC_HAVE_FORTRAN_CAPS)
972 #define matsetvalues4_ MATSETVALUES4
973 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
974 #define matsetvalues4_ matsetvalues4
975 #endif
976 
977 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
978 {
979   Mat         A  = *AA;
980   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
981   PetscInt    *rp,k,low,high,t,ii,row,nrow,i,col,l,N,n = *nn,m = *mm;
982   PetscInt    *ai=a->i,*ailen=a->ilen;
983   PetscInt    *aj=a->j,brow,bcol;
984   PetscInt    ridx,cidx,lastcol = -1;
985   MatScalar   *ap,value,*aa=a->a,*bap;
986 
987   PetscFunctionBegin;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k]; brow = row/4;
990     rp   = aj + ai[brow];
991     ap   = aa + 16*ai[brow];
992     nrow = ailen[brow];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col   = in[l]; bcol = col/4;
997       ridx  = row % 4; cidx = col % 4;
998       value = v[l + k*n];
999       if (col <= lastcol)  low = 0;
1000       else                high = nrow;
1001       lastcol = col;
1002       while (high-low > 7) {
1003         t = (low+high)/2;
1004         if (rp[t] > bcol) high = t;
1005         else              low  = t;
1006       }
1007       for (i=low; i<high; i++) {
1008         if (rp[i] > bcol) break;
1009         if (rp[i] == bcol) {
1010           bap   = ap +  16*i + 4*cidx + ridx;
1011           *bap += value;
1012           goto noinsert1;
1013         }
1014       }
1015       N = nrow++ - 1;
1016       high++; /* added new column thus must search to one higher than before */
1017       /* shift up all the later entries in this row */
1018       for (ii=N; ii>=i; ii--) {
1019         rp[ii+1] = rp[ii];
1020         PetscMemcpy(ap+16*(ii+1),ap+16*(ii),16*sizeof(MatScalar));
1021       }
1022       if (N>=i) {
1023         PetscMemzero(ap+16*i,16*sizeof(MatScalar));
1024       }
1025       rp[i]                    = bcol;
1026       ap[16*i + 4*cidx + ridx] = value;
1027 noinsert1:;
1028       low = i;
1029     }
1030     ailen[brow] = nrow;
1031   }
1032   PetscFunctionReturnVoid();
1033 }
1034 
1035 /*
1036      Checks for missing diagonals
1037 */
1038 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1039 {
1040   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1041   PetscErrorCode ierr;
1042   PetscInt       *diag,*ii = a->i,i;
1043 
1044   PetscFunctionBegin;
1045   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1046   *missing = PETSC_FALSE;
1047   if (A->rmap->n > 0 && !ii) {
1048     *missing = PETSC_TRUE;
1049     if (d) *d = 0;
1050     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1051   } else {
1052     diag = a->diag;
1053     for (i=0; i<a->mbs; i++) {
1054       if (diag[i] >= ii[i+1]) {
1055         *missing = PETSC_TRUE;
1056         if (d) *d = i;
1057         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1058         break;
1059       }
1060     }
1061   }
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1066 {
1067   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1068   PetscErrorCode ierr;
1069   PetscInt       i,j,m = a->mbs;
1070 
1071   PetscFunctionBegin;
1072   if (!a->diag) {
1073     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1074     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1075     a->free_diag = PETSC_TRUE;
1076   }
1077   for (i=0; i<m; i++) {
1078     a->diag[i] = a->i[i+1];
1079     for (j=a->i[i]; j<a->i[i+1]; j++) {
1080       if (a->j[j] == i) {
1081         a->diag[i] = j;
1082         break;
1083       }
1084     }
1085   }
1086   PetscFunctionReturn(0);
1087 }
1088 
1089 
1090 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1091 {
1092   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1093   PetscErrorCode ierr;
1094   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1095   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1096 
1097   PetscFunctionBegin;
1098   *nn = n;
1099   if (!ia) PetscFunctionReturn(0);
1100   if (symmetric) {
1101     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1102     nz   = tia[n];
1103   } else {
1104     tia = a->i; tja = a->j;
1105   }
1106 
1107   if (!blockcompressed && bs > 1) {
1108     (*nn) *= bs;
1109     /* malloc & create the natural set of indices */
1110     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1111     if (n) {
1112       (*ia)[0] = oshift;
1113       for (j=1; j<bs; j++) {
1114         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1115       }
1116     }
1117 
1118     for (i=1; i<n; i++) {
1119       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1120       for (j=1; j<bs; j++) {
1121         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1122       }
1123     }
1124     if (n) {
1125       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1126     }
1127 
1128     if (inja) {
1129       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1130       cnt = 0;
1131       for (i=0; i<n; i++) {
1132         for (j=0; j<bs; j++) {
1133           for (k=tia[i]; k<tia[i+1]; k++) {
1134             for (l=0; l<bs; l++) {
1135               (*ja)[cnt++] = bs*tja[k] + l;
1136             }
1137           }
1138         }
1139       }
1140     }
1141 
1142     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1143       ierr = PetscFree(tia);CHKERRQ(ierr);
1144       ierr = PetscFree(tja);CHKERRQ(ierr);
1145     }
1146   } else if (oshift == 1) {
1147     if (symmetric) {
1148       nz = tia[A->rmap->n/bs];
1149       /*  add 1 to i and j indices */
1150       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1151       *ia = tia;
1152       if (ja) {
1153         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1154         *ja = tja;
1155       }
1156     } else {
1157       nz = a->i[A->rmap->n/bs];
1158       /* malloc space and  add 1 to i and j indices */
1159       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1160       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1161       if (ja) {
1162         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1163         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1164       }
1165     }
1166   } else {
1167     *ia = tia;
1168     if (ja) *ja = tja;
1169   }
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1174 {
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   if (!ia) PetscFunctionReturn(0);
1179   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1180     ierr = PetscFree(*ia);CHKERRQ(ierr);
1181     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1182   }
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1187 {
1188   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192 #if defined(PETSC_USE_LOG)
1193   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1194 #endif
1195   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1196   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1197   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1198   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1199   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1200   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1201   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1202   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1203   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1204   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1205   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1206   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1207   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1208 
1209   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1210   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1211   ierr = PetscFree(A->data);CHKERRQ(ierr);
1212 
1213   ierr = PetscObjectChangeTypeName((PetscObject)A,0);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1223   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1224 #if defined(PETSC_HAVE_HYPRE)
1225   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1226 #endif
1227   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1228   ierr = PetscObjectComposeFunction((PetscObject)A,"MatPtAP_is_seqaij_C",NULL);CHKERRQ(ierr);
1229   PetscFunctionReturn(0);
1230 }
1231 
1232 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1233 {
1234   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1235   PetscErrorCode ierr;
1236 
1237   PetscFunctionBegin;
1238   switch (op) {
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = flg;
1241     break;
1242   case MAT_KEEP_NONZERO_PATTERN:
1243     a->keepnonzeropattern = flg;
1244     break;
1245   case MAT_NEW_NONZERO_LOCATIONS:
1246     a->nonew = (flg ? 0 : 1);
1247     break;
1248   case MAT_NEW_NONZERO_LOCATION_ERR:
1249     a->nonew = (flg ? -1 : 0);
1250     break;
1251   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1252     a->nonew = (flg ? -2 : 0);
1253     break;
1254   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1255     a->nounused = (flg ? -1 : 0);
1256     break;
1257   case MAT_NEW_DIAGONALS:
1258   case MAT_IGNORE_OFF_PROC_ENTRIES:
1259   case MAT_USE_HASH_TABLE:
1260     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1261     break;
1262   case MAT_SPD:
1263   case MAT_SYMMETRIC:
1264   case MAT_STRUCTURALLY_SYMMETRIC:
1265   case MAT_HERMITIAN:
1266   case MAT_SYMMETRY_ETERNAL:
1267   case MAT_SUBMAT_SINGLEIS:
1268   case MAT_STRUCTURE_ONLY:
1269     /* These options are handled directly by MatSetOption() */
1270     break;
1271   default:
1272     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1278 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1279 {
1280   PetscErrorCode ierr;
1281   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1282   MatScalar      *aa_i;
1283   PetscScalar    *v_i;
1284 
1285   PetscFunctionBegin;
1286   bs  = A->rmap->bs;
1287   bs2 = bs*bs;
1288   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1289 
1290   bn  = row/bs;   /* Block number */
1291   bp  = row % bs; /* Block Position */
1292   M   = ai[bn+1] - ai[bn];
1293   *nz = bs*M;
1294 
1295   if (v) {
1296     *v = 0;
1297     if (*nz) {
1298       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1299       for (i=0; i<M; i++) { /* for each block in the block row */
1300         v_i  = *v + i*bs;
1301         aa_i = aa + bs2*(ai[bn] + i);
1302         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1303       }
1304     }
1305   }
1306 
1307   if (idx) {
1308     *idx = 0;
1309     if (*nz) {
1310       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1311       for (i=0; i<M; i++) { /* for each block in the block row */
1312         idx_i = *idx + i*bs;
1313         itmp  = bs*aj[ai[bn] + i];
1314         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1315       }
1316     }
1317   }
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1322 {
1323   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1332 {
1333   PetscErrorCode ierr;
1334 
1335   PetscFunctionBegin;
1336   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1337   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1342 {
1343   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1344   Mat            C;
1345   PetscErrorCode ierr;
1346   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1347   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1348   MatScalar      *ata,*aa=a->a;
1349 
1350   PetscFunctionBegin;
1351   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1352   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1353     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1354 
1355     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1356     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1357     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1358     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1359 
1360     at  = (Mat_SeqBAIJ*)C->data;
1361     ati = at->i;
1362     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1363   } else {
1364     C = *B;
1365     at = (Mat_SeqBAIJ*)C->data;
1366     ati = at->i;
1367   }
1368 
1369   atj = at->j;
1370   ata = at->a;
1371 
1372   /* Copy ati into atfill so we have locations of the next free space in atj */
1373   ierr = PetscMemcpy(atfill,ati,nbs*sizeof(PetscInt));CHKERRQ(ierr);
1374 
1375   /* Walk through A row-wise and mark nonzero entries of A^T. */
1376   for (i=0; i<mbs; i++) {
1377     anzj = ai[i+1] - ai[i];
1378     for (j=0; j<anzj; j++) {
1379       atj[atfill[*aj]] = i;
1380       for (kr=0; kr<bs; kr++) {
1381         for (k=0; k<bs; k++) {
1382           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1383         }
1384       }
1385       atfill[*aj++] += 1;
1386     }
1387   }
1388   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390 
1391   /* Clean up temporary space and complete requests. */
1392   ierr = PetscFree(atfill);CHKERRQ(ierr);
1393 
1394   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1395     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404 {
1405   PetscErrorCode ierr;
1406   Mat            Btrans;
1407 
1408   PetscFunctionBegin;
1409   *f   = PETSC_FALSE;
1410   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 static PetscErrorCode MatView_SeqBAIJ_Binary(Mat A,PetscViewer viewer)
1417 {
1418   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1419   PetscErrorCode ierr;
1420   PetscInt       i,*col_lens,bs = A->rmap->bs,count,*jj,j,k,l,bs2=a->bs2;
1421   int            fd;
1422   PetscScalar    *aa;
1423   FILE           *file;
1424 
1425   PetscFunctionBegin;
1426   ierr        = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1427   ierr        = PetscMalloc1(4+A->rmap->N,&col_lens);CHKERRQ(ierr);
1428   col_lens[0] = MAT_FILE_CLASSID;
1429 
1430   col_lens[1] = A->rmap->N;
1431   col_lens[2] = A->cmap->n;
1432   col_lens[3] = a->nz*bs2;
1433 
1434   /* store lengths of each row and write (including header) to file */
1435   count = 0;
1436   for (i=0; i<a->mbs; i++) {
1437     for (j=0; j<bs; j++) {
1438       col_lens[4+count++] = bs*(a->i[i+1] - a->i[i]);
1439     }
1440   }
1441   ierr = PetscBinaryWrite(fd,col_lens,4+A->rmap->N,PETSC_INT,PETSC_TRUE);CHKERRQ(ierr);
1442   ierr = PetscFree(col_lens);CHKERRQ(ierr);
1443 
1444   /* store column indices (zero start index) */
1445   ierr  = PetscMalloc1((a->nz+1)*bs2,&jj);CHKERRQ(ierr);
1446   count = 0;
1447   for (i=0; i<a->mbs; i++) {
1448     for (j=0; j<bs; j++) {
1449       for (k=a->i[i]; k<a->i[i+1]; k++) {
1450         for (l=0; l<bs; l++) {
1451           jj[count++] = bs*a->j[k] + l;
1452         }
1453       }
1454     }
1455   }
1456   ierr = PetscBinaryWrite(fd,jj,bs2*a->nz,PETSC_INT,PETSC_FALSE);CHKERRQ(ierr);
1457   ierr = PetscFree(jj);CHKERRQ(ierr);
1458 
1459   /* store nonzero values */
1460   ierr  = PetscMalloc1((a->nz+1)*bs2,&aa);CHKERRQ(ierr);
1461   count = 0;
1462   for (i=0; i<a->mbs; i++) {
1463     for (j=0; j<bs; j++) {
1464       for (k=a->i[i]; k<a->i[i+1]; k++) {
1465         for (l=0; l<bs; l++) {
1466           aa[count++] = a->a[bs2*k + l*bs + j];
1467         }
1468       }
1469     }
1470   }
1471   ierr = PetscBinaryWrite(fd,aa,bs2*a->nz,PETSC_SCALAR,PETSC_FALSE);CHKERRQ(ierr);
1472   ierr = PetscFree(aa);CHKERRQ(ierr);
1473 
1474   ierr = PetscViewerBinaryGetInfoPointer(viewer,&file);CHKERRQ(ierr);
1475   if (file) {
1476     fprintf(file,"-matload_block_size %d\n",(int)A->rmap->bs);
1477   }
1478   PetscFunctionReturn(0);
1479 }
1480 
1481 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1482 {
1483   PetscErrorCode ierr;
1484   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1485   PetscInt       i,bs = A->rmap->bs,k;
1486 
1487   PetscFunctionBegin;
1488   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1489   for (i=0; i<a->mbs; i++) {
1490     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1491     for (k=a->i[i]; k<a->i[i+1]; k++) {
1492       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1493     }
1494     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1495   }
1496   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1497   PetscFunctionReturn(0);
1498 }
1499 
1500 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1501 {
1502   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1503   PetscErrorCode    ierr;
1504   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1505   PetscViewerFormat format;
1506 
1507   PetscFunctionBegin;
1508   if (A->structure_only) {
1509     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1510     PetscFunctionReturn(0);
1511   }
1512 
1513   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1514   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1515     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1516   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1517     const char *matname;
1518     Mat        aij;
1519     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1520     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1521     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1522     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1523     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1524   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1525       PetscFunctionReturn(0);
1526   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1527     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1528     for (i=0; i<a->mbs; i++) {
1529       for (j=0; j<bs; j++) {
1530         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1531         for (k=a->i[i]; k<a->i[i+1]; k++) {
1532           for (l=0; l<bs; l++) {
1533 #if defined(PETSC_USE_COMPLEX)
1534             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1535               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1536                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1537             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1539                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1540             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1541               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1542             }
1543 #else
1544             if (a->a[bs2*k + l*bs + j] != 0.0) {
1545               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1546             }
1547 #endif
1548           }
1549         }
1550         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1551       }
1552     }
1553     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1554   } else {
1555     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1556     for (i=0; i<a->mbs; i++) {
1557       for (j=0; j<bs; j++) {
1558         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1559         for (k=a->i[i]; k<a->i[i+1]; k++) {
1560           for (l=0; l<bs; l++) {
1561 #if defined(PETSC_USE_COMPLEX)
1562             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1563               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1564                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1565             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1566               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1567                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1568             } else {
1569               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1570             }
1571 #else
1572             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1573 #endif
1574           }
1575         }
1576         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1577       }
1578     }
1579     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1580   }
1581   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1582   PetscFunctionReturn(0);
1583 }
1584 
1585 #include <petscdraw.h>
1586 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1587 {
1588   Mat               A = (Mat) Aa;
1589   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1590   PetscErrorCode    ierr;
1591   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1592   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1593   MatScalar         *aa;
1594   PetscViewer       viewer;
1595   PetscViewerFormat format;
1596 
1597   PetscFunctionBegin;
1598   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1599   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1600   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1601 
1602   /* loop over matrix elements drawing boxes */
1603 
1604   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1605     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1606     /* Blue for negative, Cyan for zero and  Red for positive */
1607     color = PETSC_DRAW_BLUE;
1608     for (i=0,row=0; i<mbs; i++,row+=bs) {
1609       for (j=a->i[i]; j<a->i[i+1]; j++) {
1610         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1611         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1612         aa  = a->a + j*bs2;
1613         for (k=0; k<bs; k++) {
1614           for (l=0; l<bs; l++) {
1615             if (PetscRealPart(*aa++) >=  0.) continue;
1616             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1617           }
1618         }
1619       }
1620     }
1621     color = PETSC_DRAW_CYAN;
1622     for (i=0,row=0; i<mbs; i++,row+=bs) {
1623       for (j=a->i[i]; j<a->i[i+1]; j++) {
1624         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1625         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1626         aa  = a->a + j*bs2;
1627         for (k=0; k<bs; k++) {
1628           for (l=0; l<bs; l++) {
1629             if (PetscRealPart(*aa++) != 0.) continue;
1630             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1631           }
1632         }
1633       }
1634     }
1635     color = PETSC_DRAW_RED;
1636     for (i=0,row=0; i<mbs; i++,row+=bs) {
1637       for (j=a->i[i]; j<a->i[i+1]; j++) {
1638         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1639         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1640         aa  = a->a + j*bs2;
1641         for (k=0; k<bs; k++) {
1642           for (l=0; l<bs; l++) {
1643             if (PetscRealPart(*aa++) <= 0.) continue;
1644             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1645           }
1646         }
1647       }
1648     }
1649     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1650   } else {
1651     /* use contour shading to indicate magnitude of values */
1652     /* first determine max of all nonzero values */
1653     PetscReal minv = 0.0, maxv = 0.0;
1654     PetscDraw popup;
1655 
1656     for (i=0; i<a->nz*a->bs2; i++) {
1657       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1658     }
1659     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1660     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1661     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1662 
1663     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1664     for (i=0,row=0; i<mbs; i++,row+=bs) {
1665       for (j=a->i[i]; j<a->i[i+1]; j++) {
1666         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1667         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1668         aa  = a->a + j*bs2;
1669         for (k=0; k<bs; k++) {
1670           for (l=0; l<bs; l++) {
1671             MatScalar v = *aa++;
1672             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1673             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1674           }
1675         }
1676       }
1677     }
1678     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1679   }
1680   PetscFunctionReturn(0);
1681 }
1682 
1683 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1684 {
1685   PetscErrorCode ierr;
1686   PetscReal      xl,yl,xr,yr,w,h;
1687   PetscDraw      draw;
1688   PetscBool      isnull;
1689 
1690   PetscFunctionBegin;
1691   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1692   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1693   if (isnull) PetscFunctionReturn(0);
1694 
1695   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1696   xr  += w;          yr += h;        xl = -w;     yl = -h;
1697   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1698   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1699   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1700   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1701   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 
1705 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1706 {
1707   PetscErrorCode ierr;
1708   PetscBool      iascii,isbinary,isdraw;
1709 
1710   PetscFunctionBegin;
1711   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1712   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1713   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1714   if (iascii) {
1715     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1716   } else if (isbinary) {
1717     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1718   } else if (isdraw) {
1719     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1720   } else {
1721     Mat B;
1722     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1723     ierr = MatView(B,viewer);CHKERRQ(ierr);
1724     ierr = MatDestroy(&B);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 
1729 
1730 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1731 {
1732   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1733   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1734   PetscInt    *ai = a->i,*ailen = a->ilen;
1735   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1736   MatScalar   *ap,*aa = a->a;
1737 
1738   PetscFunctionBegin;
1739   for (k=0; k<m; k++) { /* loop over rows */
1740     row = im[k]; brow = row/bs;
1741     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1742     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1743     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1744     nrow = ailen[brow];
1745     for (l=0; l<n; l++) { /* loop over columns */
1746       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1747       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1748       col  = in[l];
1749       bcol = col/bs;
1750       cidx = col%bs;
1751       ridx = row%bs;
1752       high = nrow;
1753       low  = 0; /* assume unsorted */
1754       while (high-low > 5) {
1755         t = (low+high)/2;
1756         if (rp[t] > bcol) high = t;
1757         else             low  = t;
1758       }
1759       for (i=low; i<high; i++) {
1760         if (rp[i] > bcol) break;
1761         if (rp[i] == bcol) {
1762           *v++ = ap[bs2*i+bs*cidx+ridx];
1763           goto finished;
1764         }
1765       }
1766       *v++ = 0.0;
1767 finished:;
1768     }
1769   }
1770   PetscFunctionReturn(0);
1771 }
1772 
1773 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1774 {
1775   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1776   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1777   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1778   PetscErrorCode    ierr;
1779   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1780   PetscBool         roworiented=a->roworiented;
1781   const PetscScalar *value     = v;
1782   MatScalar         *ap=NULL,*aa = a->a,*bap;
1783 
1784   PetscFunctionBegin;
1785   if (roworiented) {
1786     stepval = (n-1)*bs;
1787   } else {
1788     stepval = (m-1)*bs;
1789   }
1790   for (k=0; k<m; k++) { /* loop over added rows */
1791     row = im[k];
1792     if (row < 0) continue;
1793 #if defined(PETSC_USE_DEBUG)
1794     if (row >= a->mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1795 #endif
1796     rp   = aj + ai[row];
1797     if (!A->structure_only) ap = aa + bs2*ai[row];
1798     rmax = imax[row];
1799     nrow = ailen[row];
1800     low  = 0;
1801     high = nrow;
1802     for (l=0; l<n; l++) { /* loop over added columns */
1803       if (in[l] < 0) continue;
1804 #if defined(PETSC_USE_DEBUG)
1805       if (in[l] >= a->nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1806 #endif
1807       col = in[l];
1808       if (!A->structure_only) {
1809         if (roworiented) {
1810           value = v + (k*(stepval+bs) + l)*bs;
1811         } else {
1812           value = v + (l*(stepval+bs) + k)*bs;
1813         }
1814       }
1815       if (col <= lastcol) low = 0;
1816       else high = nrow;
1817       lastcol = col;
1818       while (high-low > 7) {
1819         t = (low+high)/2;
1820         if (rp[t] > col) high = t;
1821         else             low  = t;
1822       }
1823       for (i=low; i<high; i++) {
1824         if (rp[i] > col) break;
1825         if (rp[i] == col) {
1826           if (A->structure_only) goto noinsert2;
1827           bap = ap +  bs2*i;
1828           if (roworiented) {
1829             if (is == ADD_VALUES) {
1830               for (ii=0; ii<bs; ii++,value+=stepval) {
1831                 for (jj=ii; jj<bs2; jj+=bs) {
1832                   bap[jj] += *value++;
1833                 }
1834               }
1835             } else {
1836               for (ii=0; ii<bs; ii++,value+=stepval) {
1837                 for (jj=ii; jj<bs2; jj+=bs) {
1838                   bap[jj] = *value++;
1839                 }
1840               }
1841             }
1842           } else {
1843             if (is == ADD_VALUES) {
1844               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1845                 for (jj=0; jj<bs; jj++) {
1846                   bap[jj] += value[jj];
1847                 }
1848                 bap += bs;
1849               }
1850             } else {
1851               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1852                 for (jj=0; jj<bs; jj++) {
1853                   bap[jj]  = value[jj];
1854                 }
1855                 bap += bs;
1856               }
1857             }
1858           }
1859           goto noinsert2;
1860         }
1861       }
1862       if (nonew == 1) goto noinsert2;
1863       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1864       if (A->structure_only) {
1865         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1866       } else {
1867         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1868       }
1869       N = nrow++ - 1; high++;
1870       /* shift up all the later entries in this row */
1871       for (ii=N; ii>=i; ii--) {
1872         rp[ii+1] = rp[ii];
1873         if (!A->structure_only) {
1874           ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
1875         }
1876       }
1877       if (N >= i && !A->structure_only) {
1878         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1879       }
1880 
1881       rp[i] = col;
1882       if (!A->structure_only) {
1883         bap   = ap +  bs2*i;
1884         if (roworiented) {
1885           for (ii=0; ii<bs; ii++,value+=stepval) {
1886             for (jj=ii; jj<bs2; jj+=bs) {
1887               bap[jj] = *value++;
1888             }
1889           }
1890         } else {
1891           for (ii=0; ii<bs; ii++,value+=stepval) {
1892             for (jj=0; jj<bs; jj++) {
1893               *bap++ = *value++;
1894             }
1895           }
1896         }
1897       }
1898 noinsert2:;
1899       low = i;
1900     }
1901     ailen[row] = nrow;
1902   }
1903   PetscFunctionReturn(0);
1904 }
1905 
1906 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1907 {
1908   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1909   PetscInt       fshift = 0,i,j,*ai = a->i,*aj = a->j,*imax = a->imax;
1910   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1911   PetscErrorCode ierr;
1912   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1913   MatScalar      *aa  = a->a,*ap;
1914   PetscReal      ratio=0.6;
1915 
1916   PetscFunctionBegin;
1917   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1918 
1919   if (m) rmax = ailen[0];
1920   for (i=1; i<mbs; i++) {
1921     /* move each row back by the amount of empty slots (fshift) before it*/
1922     fshift += imax[i-1] - ailen[i-1];
1923     rmax    = PetscMax(rmax,ailen[i]);
1924     if (fshift) {
1925       ip = aj + ai[i]; ap = aa + bs2*ai[i];
1926       N  = ailen[i];
1927       for (j=0; j<N; j++) {
1928         ip[j-fshift] = ip[j];
1929         if (!A->structure_only) {
1930           ierr = PetscMemcpy(ap+(j-fshift)*bs2,ap+j*bs2,bs2*sizeof(MatScalar));CHKERRQ(ierr);
1931         }
1932       }
1933     }
1934     ai[i] = ai[i-1] + ailen[i-1];
1935   }
1936   if (mbs) {
1937     fshift += imax[mbs-1] - ailen[mbs-1];
1938     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1939   }
1940 
1941   /* reset ilen and imax for each row */
1942   a->nonzerorowcnt = 0;
1943   if (A->structure_only) {
1944     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1945   } else { /* !A->structure_only */
1946     for (i=0; i<mbs; i++) {
1947       ailen[i] = imax[i] = ai[i+1] - ai[i];
1948       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1949     }
1950   }
1951   a->nz = ai[mbs];
1952 
1953   /* diagonals may have moved, so kill the diagonal pointers */
1954   a->idiagvalid = PETSC_FALSE;
1955   if (fshift && a->diag) {
1956     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1957     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1958     a->diag = 0;
1959   }
1960   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1961   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1962   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1963   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1964 
1965   A->info.mallocs    += a->reallocs;
1966   a->reallocs         = 0;
1967   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1968   a->rmax             = rmax;
1969 
1970   if (!A->structure_only) {
1971     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1972   }
1973   PetscFunctionReturn(0);
1974 }
1975 
1976 /*
1977    This function returns an array of flags which indicate the locations of contiguous
1978    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1979    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1980    Assume: sizes should be long enough to hold all the values.
1981 */
1982 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1983 {
1984   PetscInt  i,j,k,row;
1985   PetscBool flg;
1986 
1987   PetscFunctionBegin;
1988   for (i=0,j=0; i<n; j++) {
1989     row = idx[i];
1990     if (row%bs!=0) { /* Not the begining of a block */
1991       sizes[j] = 1;
1992       i++;
1993     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1994       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1995       i++;
1996     } else { /* Begining of the block, so check if the complete block exists */
1997       flg = PETSC_TRUE;
1998       for (k=1; k<bs; k++) {
1999         if (row+k != idx[i+k]) { /* break in the block */
2000           flg = PETSC_FALSE;
2001           break;
2002         }
2003       }
2004       if (flg) { /* No break in the bs */
2005         sizes[j] = bs;
2006         i       += bs;
2007       } else {
2008         sizes[j] = 1;
2009         i++;
2010       }
2011     }
2012   }
2013   *bs_max = j;
2014   PetscFunctionReturn(0);
2015 }
2016 
2017 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2018 {
2019   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2020   PetscErrorCode    ierr;
2021   PetscInt          i,j,k,count,*rows;
2022   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2023   PetscScalar       zero = 0.0;
2024   MatScalar         *aa;
2025   const PetscScalar *xx;
2026   PetscScalar       *bb;
2027 
2028   PetscFunctionBegin;
2029   /* fix right hand side if needed */
2030   if (x && b) {
2031     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2032     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2033     for (i=0; i<is_n; i++) {
2034       bb[is_idx[i]] = diag*xx[is_idx[i]];
2035     }
2036     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2037     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2038   }
2039 
2040   /* Make a copy of the IS and  sort it */
2041   /* allocate memory for rows,sizes */
2042   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2043 
2044   /* copy IS values to rows, and sort them */
2045   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2046   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2047 
2048   if (baij->keepnonzeropattern) {
2049     for (i=0; i<is_n; i++) sizes[i] = 1;
2050     bs_max          = is_n;
2051   } else {
2052     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2053     A->nonzerostate++;
2054   }
2055 
2056   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2057     row = rows[j];
2058     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2059     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2060     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2061     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2062       if (diag != (PetscScalar)0.0) {
2063         if (baij->ilen[row/bs] > 0) {
2064           baij->ilen[row/bs]       = 1;
2065           baij->j[baij->i[row/bs]] = row/bs;
2066 
2067           ierr = PetscMemzero(aa,count*bs*sizeof(MatScalar));CHKERRQ(ierr);
2068         }
2069         /* Now insert all the diagonal values for this bs */
2070         for (k=0; k<bs; k++) {
2071           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2072         }
2073       } else { /* (diag == 0.0) */
2074         baij->ilen[row/bs] = 0;
2075       } /* end (diag == 0.0) */
2076     } else { /* (sizes[i] != bs) */
2077 #if defined(PETSC_USE_DEBUG)
2078       if (sizes[i] != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2079 #endif
2080       for (k=0; k<count; k++) {
2081         aa[0] =  zero;
2082         aa   += bs;
2083       }
2084       if (diag != (PetscScalar)0.0) {
2085         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2086       }
2087     }
2088   }
2089 
2090   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2091   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2092   PetscFunctionReturn(0);
2093 }
2094 
2095 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2096 {
2097   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2098   PetscErrorCode    ierr;
2099   PetscInt          i,j,k,count;
2100   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2101   PetscScalar       zero = 0.0;
2102   MatScalar         *aa;
2103   const PetscScalar *xx;
2104   PetscScalar       *bb;
2105   PetscBool         *zeroed,vecs = PETSC_FALSE;
2106 
2107   PetscFunctionBegin;
2108   /* fix right hand side if needed */
2109   if (x && b) {
2110     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2111     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2112     vecs = PETSC_TRUE;
2113   }
2114 
2115   /* zero the columns */
2116   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2117   for (i=0; i<is_n; i++) {
2118     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2119     zeroed[is_idx[i]] = PETSC_TRUE;
2120   }
2121   for (i=0; i<A->rmap->N; i++) {
2122     if (!zeroed[i]) {
2123       row = i/bs;
2124       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2125         for (k=0; k<bs; k++) {
2126           col = bs*baij->j[j] + k;
2127           if (zeroed[col]) {
2128             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2129             if (vecs) bb[i] -= aa[0]*xx[col];
2130             aa[0] = 0.0;
2131           }
2132         }
2133       }
2134     } else if (vecs) bb[i] = diag*xx[i];
2135   }
2136   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2137   if (vecs) {
2138     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2139     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2140   }
2141 
2142   /* zero the rows */
2143   for (i=0; i<is_n; i++) {
2144     row   = is_idx[i];
2145     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2146     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2147     for (k=0; k<count; k++) {
2148       aa[0] =  zero;
2149       aa   += bs;
2150     }
2151     if (diag != (PetscScalar)0.0) {
2152       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2153     }
2154   }
2155   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2156   PetscFunctionReturn(0);
2157 }
2158 
2159 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2160 {
2161   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2162   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2163   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2164   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2165   PetscErrorCode ierr;
2166   PetscInt       ridx,cidx,bs2=a->bs2;
2167   PetscBool      roworiented=a->roworiented;
2168   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2169 
2170   PetscFunctionBegin;
2171   for (k=0; k<m; k++) { /* loop over added rows */
2172     row  = im[k];
2173     brow = row/bs;
2174     if (row < 0) continue;
2175 #if defined(PETSC_USE_DEBUG)
2176     if (row >= A->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2177 #endif
2178     rp   = aj + ai[brow];
2179     if (!A->structure_only) ap = aa + bs2*ai[brow];
2180     rmax = imax[brow];
2181     nrow = ailen[brow];
2182     low  = 0;
2183     high = nrow;
2184     for (l=0; l<n; l++) { /* loop over added columns */
2185       if (in[l] < 0) continue;
2186 #if defined(PETSC_USE_DEBUG)
2187       if (in[l] >= A->cmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2188 #endif
2189       col  = in[l]; bcol = col/bs;
2190       ridx = row % bs; cidx = col % bs;
2191       if (!A->structure_only) {
2192         if (roworiented) {
2193           value = v[l + k*n];
2194         } else {
2195           value = v[k + l*m];
2196         }
2197       }
2198       if (col <= lastcol) low = 0; else high = nrow;
2199       lastcol = col;
2200       while (high-low > 7) {
2201         t = (low+high)/2;
2202         if (rp[t] > bcol) high = t;
2203         else              low  = t;
2204       }
2205       for (i=low; i<high; i++) {
2206         if (rp[i] > bcol) break;
2207         if (rp[i] == bcol) {
2208           bap = ap +  bs2*i + bs*cidx + ridx;
2209           if (!A->structure_only) {
2210             if (is == ADD_VALUES) *bap += value;
2211             else                  *bap  = value;
2212           }
2213           goto noinsert1;
2214         }
2215       }
2216       if (nonew == 1) goto noinsert1;
2217       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2218       if (A->structure_only) {
2219         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2220       } else {
2221         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2222       }
2223       N = nrow++ - 1; high++;
2224       /* shift up all the later entries in this row */
2225       for (ii=N; ii>=i; ii--) {
2226         rp[ii+1] = rp[ii];
2227         if (!A->structure_only) {
2228           ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr);
2229         }
2230       }
2231       if (N>=i && !A->structure_only) {
2232         ierr = PetscMemzero(ap+bs2*i,bs2*sizeof(MatScalar));CHKERRQ(ierr);
2233       }
2234       rp[i]                      = bcol;
2235       if (!A->structure_only) ap[bs2*i + bs*cidx + ridx] = value;
2236       a->nz++;
2237       A->nonzerostate++;
2238 noinsert1:;
2239       low = i;
2240     }
2241     ailen[brow] = nrow;
2242   }
2243   PetscFunctionReturn(0);
2244 }
2245 
2246 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2247 {
2248   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2249   Mat            outA;
2250   PetscErrorCode ierr;
2251   PetscBool      row_identity,col_identity;
2252 
2253   PetscFunctionBegin;
2254   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2255   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2256   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2257   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2258 
2259   outA            = inA;
2260   inA->factortype = MAT_FACTOR_LU;
2261   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2262   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2263 
2264   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2265 
2266   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2267   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2268   a->row = row;
2269   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2270   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2271   a->col = col;
2272 
2273   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2274   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2275   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2276   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2277 
2278   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2279   if (!a->solve_work) {
2280     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2281     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2282   }
2283   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2288 {
2289   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2290   PetscInt    i,nz,mbs;
2291 
2292   PetscFunctionBegin;
2293   nz  = baij->maxnz;
2294   mbs = baij->mbs;
2295   for (i=0; i<nz; i++) {
2296     baij->j[i] = indices[i];
2297   }
2298   baij->nz = nz;
2299   for (i=0; i<mbs; i++) {
2300     baij->ilen[i] = baij->imax[i];
2301   }
2302   PetscFunctionReturn(0);
2303 }
2304 
2305 /*@
2306     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2307        in the matrix.
2308 
2309   Input Parameters:
2310 +  mat - the SeqBAIJ matrix
2311 -  indices - the column indices
2312 
2313   Level: advanced
2314 
2315   Notes:
2316     This can be called if you have precomputed the nonzero structure of the
2317   matrix and want to provide it to the matrix object to improve the performance
2318   of the MatSetValues() operation.
2319 
2320     You MUST have set the correct numbers of nonzeros per row in the call to
2321   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2322 
2323     MUST be called before any calls to MatSetValues();
2324 
2325 @*/
2326 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2327 {
2328   PetscErrorCode ierr;
2329 
2330   PetscFunctionBegin;
2331   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2332   PetscValidPointer(indices,2);
2333   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2334   PetscFunctionReturn(0);
2335 }
2336 
2337 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2338 {
2339   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2340   PetscErrorCode ierr;
2341   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2342   PetscReal      atmp;
2343   PetscScalar    *x,zero = 0.0;
2344   MatScalar      *aa;
2345   PetscInt       ncols,brow,krow,kcol;
2346 
2347   PetscFunctionBegin;
2348   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2349   bs  = A->rmap->bs;
2350   aa  = a->a;
2351   ai  = a->i;
2352   aj  = a->j;
2353   mbs = a->mbs;
2354 
2355   ierr = VecSet(v,zero);CHKERRQ(ierr);
2356   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2357   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2358   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2359   for (i=0; i<mbs; i++) {
2360     ncols = ai[1] - ai[0]; ai++;
2361     brow  = bs*i;
2362     for (j=0; j<ncols; j++) {
2363       for (kcol=0; kcol<bs; kcol++) {
2364         for (krow=0; krow<bs; krow++) {
2365           atmp = PetscAbsScalar(*aa);aa++;
2366           row  = brow + krow;   /* row index */
2367           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2368         }
2369       }
2370       aj++;
2371     }
2372   }
2373   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2378 {
2379   PetscErrorCode ierr;
2380 
2381   PetscFunctionBegin;
2382   /* If the two matrices have the same copy implementation, use fast copy. */
2383   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2384     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2385     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2386     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2387 
2388     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2389     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2390     ierr = PetscMemcpy(b->a,a->a,(bs2*a->i[ambs])*sizeof(PetscScalar));CHKERRQ(ierr);
2391     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2392   } else {
2393     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2394   }
2395   PetscFunctionReturn(0);
2396 }
2397 
2398 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2399 {
2400   PetscErrorCode ierr;
2401 
2402   PetscFunctionBegin;
2403   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0);CHKERRQ(ierr);
2404   PetscFunctionReturn(0);
2405 }
2406 
2407 PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2408 {
2409   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2410 
2411   PetscFunctionBegin;
2412   *array = a->a;
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2417 {
2418   PetscFunctionBegin;
2419   PetscFunctionReturn(0);
2420 }
2421 
2422 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2423 {
2424   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2425   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2426   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2427   PetscErrorCode ierr;
2428 
2429   PetscFunctionBegin;
2430   /* Set the number of nonzeros in the new matrix */
2431   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2432   PetscFunctionReturn(0);
2433 }
2434 
2435 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2436 {
2437   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2438   PetscErrorCode ierr;
2439   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2440   PetscBLASInt   one=1;
2441 
2442   PetscFunctionBegin;
2443   if (str == SAME_NONZERO_PATTERN) {
2444     PetscScalar  alpha = a;
2445     PetscBLASInt bnz;
2446     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2447     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2448     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2449   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2450     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2451   } else {
2452     Mat      B;
2453     PetscInt *nnz;
2454     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2455     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2456     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2457     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2458     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2459     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2460     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2461     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2462     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2463     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2464     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2465     ierr = PetscFree(nnz);CHKERRQ(ierr);
2466   }
2467   PetscFunctionReturn(0);
2468 }
2469 
2470 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2471 {
2472   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2473   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2474   MatScalar   *aa = a->a;
2475 
2476   PetscFunctionBegin;
2477   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2478   PetscFunctionReturn(0);
2479 }
2480 
2481 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2482 {
2483   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2484   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2485   MatScalar   *aa = a->a;
2486 
2487   PetscFunctionBegin;
2488   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 /*
2493     Code almost idential to MatGetColumnIJ_SeqAIJ() should share common code
2494 */
2495 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2496 {
2497   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2498   PetscErrorCode ierr;
2499   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2500   PetscInt       nz = a->i[m],row,*jj,mr,col;
2501 
2502   PetscFunctionBegin;
2503   *nn = n;
2504   if (!ia) PetscFunctionReturn(0);
2505   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2506   else {
2507     ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2508     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2509     ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2510     jj   = a->j;
2511     for (i=0; i<nz; i++) {
2512       collengths[jj[i]]++;
2513     }
2514     cia[0] = oshift;
2515     for (i=0; i<n; i++) {
2516       cia[i+1] = cia[i] + collengths[i];
2517     }
2518     ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2519     jj   = a->j;
2520     for (row=0; row<m; row++) {
2521       mr = a->i[row+1] - a->i[row];
2522       for (i=0; i<mr; i++) {
2523         col = *jj++;
2524 
2525         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2526       }
2527     }
2528     ierr = PetscFree(collengths);CHKERRQ(ierr);
2529     *ia  = cia; *ja = cja;
2530   }
2531   PetscFunctionReturn(0);
2532 }
2533 
2534 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2535 {
2536   PetscErrorCode ierr;
2537 
2538   PetscFunctionBegin;
2539   if (!ia) PetscFunctionReturn(0);
2540   ierr = PetscFree(*ia);CHKERRQ(ierr);
2541   ierr = PetscFree(*ja);CHKERRQ(ierr);
2542   PetscFunctionReturn(0);
2543 }
2544 
2545 /*
2546  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2547  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2548  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2549  */
2550 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2551 {
2552   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2553   PetscErrorCode ierr;
2554   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2555   PetscInt       nz = a->i[m],row,*jj,mr,col;
2556   PetscInt       *cspidx;
2557 
2558   PetscFunctionBegin;
2559   *nn = n;
2560   if (!ia) PetscFunctionReturn(0);
2561 
2562   ierr = PetscCalloc1(n+1,&collengths);CHKERRQ(ierr);
2563   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2564   ierr = PetscMalloc1(nz+1,&cja);CHKERRQ(ierr);
2565   ierr = PetscMalloc1(nz+1,&cspidx);CHKERRQ(ierr);
2566   jj   = a->j;
2567   for (i=0; i<nz; i++) {
2568     collengths[jj[i]]++;
2569   }
2570   cia[0] = oshift;
2571   for (i=0; i<n; i++) {
2572     cia[i+1] = cia[i] + collengths[i];
2573   }
2574   ierr = PetscMemzero(collengths,n*sizeof(PetscInt));CHKERRQ(ierr);
2575   jj   = a->j;
2576   for (row=0; row<m; row++) {
2577     mr = a->i[row+1] - a->i[row];
2578     for (i=0; i<mr; i++) {
2579       col = *jj++;
2580       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2581       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2582     }
2583   }
2584   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2585   *ia    = cia; *ja = cja;
2586   *spidx = cspidx;
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2591 {
2592   PetscErrorCode ierr;
2593 
2594   PetscFunctionBegin;
2595   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2596   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2597   PetscFunctionReturn(0);
2598 }
2599 
2600 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2601 {
2602   PetscErrorCode ierr;
2603   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2604 
2605   PetscFunctionBegin;
2606   if (!Y->preallocated || !aij->nz) {
2607     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2608   }
2609   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2610   PetscFunctionReturn(0);
2611 }
2612 
2613 /* -------------------------------------------------------------------*/
2614 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2615                                        MatGetRow_SeqBAIJ,
2616                                        MatRestoreRow_SeqBAIJ,
2617                                        MatMult_SeqBAIJ_N,
2618                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2619                                        MatMultTranspose_SeqBAIJ,
2620                                        MatMultTransposeAdd_SeqBAIJ,
2621                                        0,
2622                                        0,
2623                                        0,
2624                                /* 10*/ 0,
2625                                        MatLUFactor_SeqBAIJ,
2626                                        0,
2627                                        0,
2628                                        MatTranspose_SeqBAIJ,
2629                                /* 15*/ MatGetInfo_SeqBAIJ,
2630                                        MatEqual_SeqBAIJ,
2631                                        MatGetDiagonal_SeqBAIJ,
2632                                        MatDiagonalScale_SeqBAIJ,
2633                                        MatNorm_SeqBAIJ,
2634                                /* 20*/ 0,
2635                                        MatAssemblyEnd_SeqBAIJ,
2636                                        MatSetOption_SeqBAIJ,
2637                                        MatZeroEntries_SeqBAIJ,
2638                                /* 24*/ MatZeroRows_SeqBAIJ,
2639                                        0,
2640                                        0,
2641                                        0,
2642                                        0,
2643                                /* 29*/ MatSetUp_SeqBAIJ,
2644                                        0,
2645                                        0,
2646                                        0,
2647                                        0,
2648                                /* 34*/ MatDuplicate_SeqBAIJ,
2649                                        0,
2650                                        0,
2651                                        MatILUFactor_SeqBAIJ,
2652                                        0,
2653                                /* 39*/ MatAXPY_SeqBAIJ,
2654                                        MatCreateSubMatrices_SeqBAIJ,
2655                                        MatIncreaseOverlap_SeqBAIJ,
2656                                        MatGetValues_SeqBAIJ,
2657                                        MatCopy_SeqBAIJ,
2658                                /* 44*/ 0,
2659                                        MatScale_SeqBAIJ,
2660                                        MatShift_SeqBAIJ,
2661                                        0,
2662                                        MatZeroRowsColumns_SeqBAIJ,
2663                                /* 49*/ 0,
2664                                        MatGetRowIJ_SeqBAIJ,
2665                                        MatRestoreRowIJ_SeqBAIJ,
2666                                        MatGetColumnIJ_SeqBAIJ,
2667                                        MatRestoreColumnIJ_SeqBAIJ,
2668                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2669                                        0,
2670                                        0,
2671                                        0,
2672                                        MatSetValuesBlocked_SeqBAIJ,
2673                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2674                                        MatDestroy_SeqBAIJ,
2675                                        MatView_SeqBAIJ,
2676                                        0,
2677                                        0,
2678                                /* 64*/ 0,
2679                                        0,
2680                                        0,
2681                                        0,
2682                                        0,
2683                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2684                                        0,
2685                                        MatConvert_Basic,
2686                                        0,
2687                                        0,
2688                                /* 74*/ 0,
2689                                        MatFDColoringApply_BAIJ,
2690                                        0,
2691                                        0,
2692                                        0,
2693                                /* 79*/ 0,
2694                                        0,
2695                                        0,
2696                                        0,
2697                                        MatLoad_SeqBAIJ,
2698                                /* 84*/ 0,
2699                                        0,
2700                                        0,
2701                                        0,
2702                                        0,
2703                                /* 89*/ 0,
2704                                        0,
2705                                        0,
2706                                        0,
2707                                        0,
2708                                /* 94*/ 0,
2709                                        0,
2710                                        0,
2711                                        0,
2712                                        0,
2713                                /* 99*/ 0,
2714                                        0,
2715                                        0,
2716                                        0,
2717                                        0,
2718                                /*104*/ 0,
2719                                        MatRealPart_SeqBAIJ,
2720                                        MatImaginaryPart_SeqBAIJ,
2721                                        0,
2722                                        0,
2723                                /*109*/ 0,
2724                                        0,
2725                                        0,
2726                                        0,
2727                                        MatMissingDiagonal_SeqBAIJ,
2728                                /*114*/ 0,
2729                                        0,
2730                                        0,
2731                                        0,
2732                                        0,
2733                                /*119*/ 0,
2734                                        0,
2735                                        MatMultHermitianTranspose_SeqBAIJ,
2736                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2737                                        0,
2738                                /*124*/ 0,
2739                                        0,
2740                                        MatInvertBlockDiagonal_SeqBAIJ,
2741                                        0,
2742                                        0,
2743                                /*129*/ 0,
2744                                        0,
2745                                        0,
2746                                        0,
2747                                        0,
2748                                /*134*/ 0,
2749                                        0,
2750                                        0,
2751                                        0,
2752                                        0,
2753                                /*139*/ MatSetBlockSizes_Default,
2754                                        0,
2755                                        0,
2756                                        MatFDColoringSetUp_SeqXAIJ,
2757                                        0,
2758                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2759                                        MatDestroySubMatrices_SeqBAIJ
2760 };
2761 
2762 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2763 {
2764   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2765   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2766   PetscErrorCode ierr;
2767 
2768   PetscFunctionBegin;
2769   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2770 
2771   /* allocate space for values if not already there */
2772   if (!aij->saved_values) {
2773     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2774     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2775   }
2776 
2777   /* copy values over */
2778   ierr = PetscMemcpy(aij->saved_values,aij->a,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2779   PetscFunctionReturn(0);
2780 }
2781 
2782 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2783 {
2784   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2785   PetscErrorCode ierr;
2786   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2787 
2788   PetscFunctionBegin;
2789   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2790   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2791 
2792   /* copy values over */
2793   ierr = PetscMemcpy(aij->a,aij->saved_values,nz*sizeof(PetscScalar));CHKERRQ(ierr);
2794   PetscFunctionReturn(0);
2795 }
2796 
2797 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2798 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2799 
2800 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2801 {
2802   Mat_SeqBAIJ    *b;
2803   PetscErrorCode ierr;
2804   PetscInt       i,mbs,nbs,bs2;
2805   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2806 
2807   PetscFunctionBegin;
2808   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2809   if (nz == MAT_SKIP_ALLOCATION) {
2810     skipallocation = PETSC_TRUE;
2811     nz             = 0;
2812   }
2813 
2814   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2815   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2816   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2817   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2818 
2819   B->preallocated = PETSC_TRUE;
2820 
2821   mbs = B->rmap->n/bs;
2822   nbs = B->cmap->n/bs;
2823   bs2 = bs*bs;
2824 
2825   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2826 
2827   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2828   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2829   if (nnz) {
2830     for (i=0; i<mbs; i++) {
2831       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2832       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2833     }
2834   }
2835 
2836   b    = (Mat_SeqBAIJ*)B->data;
2837   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2838   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2839   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2840 
2841   if (!flg) {
2842     switch (bs) {
2843     case 1:
2844       B->ops->mult    = MatMult_SeqBAIJ_1;
2845       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2846       break;
2847     case 2:
2848       B->ops->mult    = MatMult_SeqBAIJ_2;
2849       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2850       break;
2851     case 3:
2852       B->ops->mult    = MatMult_SeqBAIJ_3;
2853       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2854       break;
2855     case 4:
2856       B->ops->mult    = MatMult_SeqBAIJ_4;
2857       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2858       break;
2859     case 5:
2860       B->ops->mult    = MatMult_SeqBAIJ_5;
2861       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2862       break;
2863     case 6:
2864       B->ops->mult    = MatMult_SeqBAIJ_6;
2865       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2866       break;
2867     case 7:
2868       B->ops->mult    = MatMult_SeqBAIJ_7;
2869       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2870       break;
2871     case 9:
2872 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2873       B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2874       B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2875 #else
2876       B->ops->mult    = MatMult_SeqBAIJ_N;
2877       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2878 #endif
2879       break;
2880     case 11:
2881       B->ops->mult    = MatMult_SeqBAIJ_11;
2882       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2883       break;
2884     case 15:
2885       B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2886       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2887       break;
2888     default:
2889       B->ops->mult    = MatMult_SeqBAIJ_N;
2890       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2891       break;
2892     }
2893   }
2894   B->ops->sor = MatSOR_SeqBAIJ;
2895   b->mbs = mbs;
2896   b->nbs = nbs;
2897   if (!skipallocation) {
2898     if (!b->imax) {
2899       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2900       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2901 
2902       b->free_imax_ilen = PETSC_TRUE;
2903     }
2904     /* b->ilen will count nonzeros in each block row so far. */
2905     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2906     if (!nnz) {
2907       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2908       else if (nz < 0) nz = 1;
2909       nz = PetscMin(nz,nbs);
2910       for (i=0; i<mbs; i++) b->imax[i] = nz;
2911       nz = nz*mbs;
2912     } else {
2913       nz = 0;
2914       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz += nnz[i];}
2915     }
2916 
2917     /* allocate the matrix space */
2918     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2919     if (B->structure_only) {
2920       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2921       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2922       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2923     } else {
2924       ierr = PetscMalloc3(bs2*nz,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2925       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2926       ierr = PetscMemzero(b->a,nz*bs2*sizeof(MatScalar));CHKERRQ(ierr);
2927     }
2928     ierr = PetscMemzero(b->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
2929 
2930     if (B->structure_only) {
2931       b->singlemalloc = PETSC_FALSE;
2932       b->free_a       = PETSC_FALSE;
2933     } else {
2934       b->singlemalloc = PETSC_TRUE;
2935       b->free_a       = PETSC_TRUE;
2936     }
2937     b->free_ij = PETSC_TRUE;
2938 
2939     b->i[0] = 0;
2940     for (i=1; i<mbs+1; i++) {
2941       b->i[i] = b->i[i-1] + b->imax[i-1];
2942     }
2943 
2944   } else {
2945     b->free_a  = PETSC_FALSE;
2946     b->free_ij = PETSC_FALSE;
2947   }
2948 
2949   b->bs2              = bs2;
2950   b->mbs              = mbs;
2951   b->nz               = 0;
2952   b->maxnz            = nz;
2953   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
2954   B->was_assembled    = PETSC_FALSE;
2955   B->assembled        = PETSC_FALSE;
2956   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
2957   PetscFunctionReturn(0);
2958 }
2959 
2960 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
2961 {
2962   PetscInt       i,m,nz,nz_max=0,*nnz;
2963   PetscScalar    *values=0;
2964   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
2965   PetscErrorCode ierr;
2966 
2967   PetscFunctionBegin;
2968   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
2969   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
2970   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
2971   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2972   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2973   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2974   m    = B->rmap->n/bs;
2975 
2976   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
2977   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
2978   for (i=0; i<m; i++) {
2979     nz = ii[i+1]- ii[i];
2980     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
2981     nz_max = PetscMax(nz_max, nz);
2982     nnz[i] = nz;
2983   }
2984   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2985   ierr = PetscFree(nnz);CHKERRQ(ierr);
2986 
2987   values = (PetscScalar*)V;
2988   if (!values) {
2989     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
2990   }
2991   for (i=0; i<m; i++) {
2992     PetscInt          ncols  = ii[i+1] - ii[i];
2993     const PetscInt    *icols = jj + ii[i];
2994     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
2995     if (!roworiented) {
2996       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
2997     } else {
2998       PetscInt j;
2999       for (j=0; j<ncols; j++) {
3000         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3001         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3002       }
3003     }
3004   }
3005   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3006   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3007   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3008   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3009   PetscFunctionReturn(0);
3010 }
3011 
3012 /*MC
3013    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3014    block sparse compressed row format.
3015 
3016    Options Database Keys:
3017 . -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3018 
3019   Level: beginner
3020 
3021 .seealso: MatCreateSeqBAIJ()
3022 M*/
3023 
3024 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3025 
3026 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3027 {
3028   PetscErrorCode ierr;
3029   PetscMPIInt    size;
3030   Mat_SeqBAIJ    *b;
3031 
3032   PetscFunctionBegin;
3033   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRQ(ierr);
3034   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3035 
3036   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3037   B->data = (void*)b;
3038   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3039 
3040   b->row          = 0;
3041   b->col          = 0;
3042   b->icol         = 0;
3043   b->reallocs     = 0;
3044   b->saved_values = 0;
3045 
3046   b->roworiented        = PETSC_TRUE;
3047   b->nonew              = 0;
3048   b->diag               = 0;
3049   B->spptr              = 0;
3050   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3051   b->keepnonzeropattern = PETSC_FALSE;
3052 
3053   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3054   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3055   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3056   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3057   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3058   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3059   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3060   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3061   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3062 #if defined(PETSC_HAVE_HYPRE)
3063   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3064 #endif
3065   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3066   ierr = PetscObjectComposeFunction((PetscObject)B,"MatPtAP_is_seqbaij_C",MatPtAP_IS_XAIJ);CHKERRQ(ierr);
3067   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3068   PetscFunctionReturn(0);
3069 }
3070 
3071 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3072 {
3073   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3074   PetscErrorCode ierr;
3075   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3076 
3077   PetscFunctionBegin;
3078   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3079 
3080   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3081     c->imax           = a->imax;
3082     c->ilen           = a->ilen;
3083     c->free_imax_ilen = PETSC_FALSE;
3084   } else {
3085     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3086     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3087     for (i=0; i<mbs; i++) {
3088       c->imax[i] = a->imax[i];
3089       c->ilen[i] = a->ilen[i];
3090     }
3091     c->free_imax_ilen = PETSC_TRUE;
3092   }
3093 
3094   /* allocate the matrix space */
3095   if (mallocmatspace) {
3096     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3097       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3098       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3099 
3100       c->i            = a->i;
3101       c->j            = a->j;
3102       c->singlemalloc = PETSC_FALSE;
3103       c->free_a       = PETSC_TRUE;
3104       c->free_ij      = PETSC_FALSE;
3105       c->parent       = A;
3106       C->preallocated = PETSC_TRUE;
3107       C->assembled    = PETSC_TRUE;
3108 
3109       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3110       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3111       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3112     } else {
3113       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3114       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3115 
3116       c->singlemalloc = PETSC_TRUE;
3117       c->free_a       = PETSC_TRUE;
3118       c->free_ij      = PETSC_TRUE;
3119 
3120       ierr = PetscMemcpy(c->i,a->i,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3121       if (mbs > 0) {
3122         ierr = PetscMemcpy(c->j,a->j,nz*sizeof(PetscInt));CHKERRQ(ierr);
3123         if (cpvalues == MAT_COPY_VALUES) {
3124           ierr = PetscMemcpy(c->a,a->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3125         } else {
3126           ierr = PetscMemzero(c->a,bs2*nz*sizeof(MatScalar));CHKERRQ(ierr);
3127         }
3128       }
3129       C->preallocated = PETSC_TRUE;
3130       C->assembled    = PETSC_TRUE;
3131     }
3132   }
3133 
3134   c->roworiented = a->roworiented;
3135   c->nonew       = a->nonew;
3136 
3137   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3138   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3139 
3140   c->bs2         = a->bs2;
3141   c->mbs         = a->mbs;
3142   c->nbs         = a->nbs;
3143 
3144   if (a->diag) {
3145     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3146       c->diag      = a->diag;
3147       c->free_diag = PETSC_FALSE;
3148     } else {
3149       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3150       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3151       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3152       c->free_diag = PETSC_TRUE;
3153     }
3154   } else c->diag = 0;
3155 
3156   c->nz         = a->nz;
3157   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3158   c->solve_work = NULL;
3159   c->mult_work  = NULL;
3160   c->sor_workt  = NULL;
3161   c->sor_work   = NULL;
3162 
3163   c->compressedrow.use   = a->compressedrow.use;
3164   c->compressedrow.nrows = a->compressedrow.nrows;
3165   if (a->compressedrow.use) {
3166     i    = a->compressedrow.nrows;
3167     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3168     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3169     ierr = PetscMemcpy(c->compressedrow.i,a->compressedrow.i,(i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3170     ierr = PetscMemcpy(c->compressedrow.rindex,a->compressedrow.rindex,i*sizeof(PetscInt));CHKERRQ(ierr);
3171   } else {
3172     c->compressedrow.use    = PETSC_FALSE;
3173     c->compressedrow.i      = NULL;
3174     c->compressedrow.rindex = NULL;
3175   }
3176   C->nonzerostate = A->nonzerostate;
3177 
3178   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3179   PetscFunctionReturn(0);
3180 }
3181 
3182 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3183 {
3184   PetscErrorCode ierr;
3185 
3186   PetscFunctionBegin;
3187   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3188   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3189   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3190   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3191   PetscFunctionReturn(0);
3192 }
3193 
3194 PetscErrorCode MatLoad_SeqBAIJ(Mat newmat,PetscViewer viewer)
3195 {
3196   Mat_SeqBAIJ    *a;
3197   PetscErrorCode ierr;
3198   PetscInt       i,nz,header[4],*rowlengths=0,M,N,bs = newmat->rmap->bs;
3199   PetscInt       *mask,mbs,*jj,j,rowcount,nzcount,k,*browlengths,maskcount;
3200   PetscInt       kmax,jcount,block,idx,point,nzcountb,extra_rows,rows,cols;
3201   PetscInt       *masked,nmask,tmp,bs2,ishift;
3202   PetscMPIInt    size;
3203   int            fd;
3204   PetscScalar    *aa;
3205   MPI_Comm       comm;
3206   PetscBool      isbinary;
3207 
3208   PetscFunctionBegin;
3209   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3210   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)newmat),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)newmat)->type_name);
3211 
3212   /* force binary viewer to load .info file if it has not yet done so */
3213   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3214   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
3215   ierr = PetscOptionsBegin(comm,NULL,"Options for loading SEQBAIJ matrix","Mat");CHKERRQ(ierr);
3216   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
3217   ierr = PetscOptionsEnd();CHKERRQ(ierr);
3218   if (bs < 0) bs = 1;
3219   bs2  = bs*bs;
3220 
3221   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3222   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"view must have one processor");
3223   ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
3224   ierr = PetscBinaryRead(fd,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3225   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not Mat object");
3226   M = header[1]; N = header[2]; nz = header[3];
3227 
3228   if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as SeqBAIJ");
3229   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
3230 
3231   /*
3232      This code adds extra rows to make sure the number of rows is
3233     divisible by the blocksize
3234   */
3235   mbs        = M/bs;
3236   extra_rows = bs - M + bs*(mbs);
3237   if (extra_rows == bs) extra_rows = 0;
3238   else mbs++;
3239   if (extra_rows) {
3240     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
3241   }
3242 
3243   /* Set global sizes if not already set */
3244   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) {
3245     ierr = MatSetSizes(newmat,PETSC_DECIDE,PETSC_DECIDE,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
3246   } else { /* Check if the matrix global sizes are correct */
3247     ierr = MatGetSize(newmat,&rows,&cols);CHKERRQ(ierr);
3248     if (rows < 0 && cols < 0) { /* user might provide local size instead of global size */
3249       ierr = MatGetLocalSize(newmat,&rows,&cols);CHKERRQ(ierr);
3250     }
3251     if (M != rows ||  N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix in file of different length (%d, %d) than the input matrix (%d, %d)",M,N,rows,cols);
3252   }
3253 
3254   /* read in row lengths */
3255   ierr = PetscMalloc1(M+extra_rows,&rowlengths);CHKERRQ(ierr);
3256   ierr = PetscBinaryRead(fd,rowlengths,M,NULL,PETSC_INT);CHKERRQ(ierr);
3257   for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
3258 
3259   /* read in column indices */
3260   ierr = PetscMalloc1(nz+extra_rows,&jj);CHKERRQ(ierr);
3261   ierr = PetscBinaryRead(fd,jj,nz,NULL,PETSC_INT);CHKERRQ(ierr);
3262   for (i=0; i<extra_rows; i++) jj[nz+i] = M+i;
3263 
3264   /* loop over row lengths determining block row lengths */
3265   ierr     = PetscCalloc1(mbs,&browlengths);CHKERRQ(ierr);
3266   ierr     = PetscMalloc2(mbs,&mask,mbs,&masked);CHKERRQ(ierr);
3267   ierr     = PetscMemzero(mask,mbs*sizeof(PetscInt));CHKERRQ(ierr);
3268   rowcount = 0;
3269   nzcount  = 0;
3270   for (i=0; i<mbs; i++) {
3271     nmask = 0;
3272     for (j=0; j<bs; j++) {
3273       kmax = rowlengths[rowcount];
3274       for (k=0; k<kmax; k++) {
3275         tmp = jj[nzcount++]/bs;
3276         if (!mask[tmp]) {masked[nmask++] = tmp; mask[tmp] = 1;}
3277       }
3278       rowcount++;
3279     }
3280     browlengths[i] += nmask;
3281     /* zero out the mask elements we set */
3282     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3283   }
3284 
3285   /* Do preallocation  */
3286   ierr = MatSeqBAIJSetPreallocation(newmat,bs,0,browlengths);CHKERRQ(ierr);
3287   a    = (Mat_SeqBAIJ*)newmat->data;
3288 
3289   /* set matrix "i" values */
3290   a->i[0] = 0;
3291   for (i=1; i<= mbs; i++) {
3292     a->i[i]      = a->i[i-1] + browlengths[i-1];
3293     a->ilen[i-1] = browlengths[i-1];
3294   }
3295   a->nz = 0;
3296   for (i=0; i<mbs; i++) a->nz += browlengths[i];
3297 
3298   /* read in nonzero values */
3299   ierr = PetscMalloc1(nz+extra_rows,&aa);CHKERRQ(ierr);
3300   ierr = PetscBinaryRead(fd,aa,nz,NULL,PETSC_SCALAR);CHKERRQ(ierr);
3301   for (i=0; i<extra_rows; i++) aa[nz+i] = 1.0;
3302 
3303   /* set "a" and "j" values into matrix */
3304   nzcount = 0; jcount = 0;
3305   for (i=0; i<mbs; i++) {
3306     nzcountb = nzcount;
3307     nmask    = 0;
3308     for (j=0; j<bs; j++) {
3309       kmax = rowlengths[i*bs+j];
3310       for (k=0; k<kmax; k++) {
3311         tmp = jj[nzcount++]/bs;
3312         if (!mask[tmp]) { masked[nmask++] = tmp; mask[tmp] = 1;}
3313       }
3314     }
3315     /* sort the masked values */
3316     ierr = PetscSortInt(nmask,masked);CHKERRQ(ierr);
3317 
3318     /* set "j" values into matrix */
3319     maskcount = 1;
3320     for (j=0; j<nmask; j++) {
3321       a->j[jcount++]  = masked[j];
3322       mask[masked[j]] = maskcount++;
3323     }
3324     /* set "a" values into matrix */
3325     ishift = bs2*a->i[i];
3326     for (j=0; j<bs; j++) {
3327       kmax = rowlengths[i*bs+j];
3328       for (k=0; k<kmax; k++) {
3329         tmp       = jj[nzcountb]/bs;
3330         block     = mask[tmp] - 1;
3331         point     = jj[nzcountb] - bs*tmp;
3332         idx       = ishift + bs2*block + j + bs*point;
3333         a->a[idx] = (MatScalar)aa[nzcountb++];
3334       }
3335     }
3336     /* zero out the mask elements we set */
3337     for (j=0; j<nmask; j++) mask[masked[j]] = 0;
3338   }
3339   if (jcount != a->nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Bad binary matrix");
3340 
3341   ierr = PetscFree(rowlengths);CHKERRQ(ierr);
3342   ierr = PetscFree(browlengths);CHKERRQ(ierr);
3343   ierr = PetscFree(aa);CHKERRQ(ierr);
3344   ierr = PetscFree(jj);CHKERRQ(ierr);
3345   ierr = PetscFree2(mask,masked);CHKERRQ(ierr);
3346 
3347   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3348   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3349   PetscFunctionReturn(0);
3350 }
3351 
3352 /*@C
3353    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3354    compressed row) format.  For good matrix assembly performance the
3355    user should preallocate the matrix storage by setting the parameter nz
3356    (or the array nnz).  By setting these parameters accurately, performance
3357    during matrix assembly can be increased by more than a factor of 50.
3358 
3359    Collective on MPI_Comm
3360 
3361    Input Parameters:
3362 +  comm - MPI communicator, set to PETSC_COMM_SELF
3363 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3364           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3365 .  m - number of rows
3366 .  n - number of columns
3367 .  nz - number of nonzero blocks  per block row (same for all rows)
3368 -  nnz - array containing the number of nonzero blocks in the various block rows
3369          (possibly different for each block row) or NULL
3370 
3371    Output Parameter:
3372 .  A - the matrix
3373 
3374    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3375    MatXXXXSetPreallocation() paradgm instead of this routine directly.
3376    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3377 
3378    Options Database Keys:
3379 .   -mat_no_unroll - uses code that does not unroll the loops in the
3380                      block calculations (much slower)
3381 .    -mat_block_size - size of the blocks to use
3382 
3383    Level: intermediate
3384 
3385    Notes:
3386    The number of rows and columns must be divisible by blocksize.
3387 
3388    If the nnz parameter is given then the nz parameter is ignored
3389 
3390    A nonzero block is any block that as 1 or more nonzeros in it
3391 
3392    The block AIJ format is fully compatible with standard Fortran 77
3393    storage.  That is, the stored row and column indices can begin at
3394    either one (as in Fortran) or zero.  See the users' manual for details.
3395 
3396    Specify the preallocated storage with either nz or nnz (not both).
3397    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3398    allocation.  See Users-Manual: ch_mat for details.
3399    matrices.
3400 
3401 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3402 @*/
3403 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3404 {
3405   PetscErrorCode ierr;
3406 
3407   PetscFunctionBegin;
3408   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3409   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3410   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3411   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3412   PetscFunctionReturn(0);
3413 }
3414 
3415 /*@C
3416    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3417    per row in the matrix. For good matrix assembly performance the
3418    user should preallocate the matrix storage by setting the parameter nz
3419    (or the array nnz).  By setting these parameters accurately, performance
3420    during matrix assembly can be increased by more than a factor of 50.
3421 
3422    Collective on MPI_Comm
3423 
3424    Input Parameters:
3425 +  B - the matrix
3426 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3427           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3428 .  nz - number of block nonzeros per block row (same for all rows)
3429 -  nnz - array containing the number of block nonzeros in the various block rows
3430          (possibly different for each block row) or NULL
3431 
3432    Options Database Keys:
3433 .   -mat_no_unroll - uses code that does not unroll the loops in the
3434                      block calculations (much slower)
3435 .   -mat_block_size - size of the blocks to use
3436 
3437    Level: intermediate
3438 
3439    Notes:
3440    If the nnz parameter is given then the nz parameter is ignored
3441 
3442    You can call MatGetInfo() to get information on how effective the preallocation was;
3443    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3444    You can also run with the option -info and look for messages with the string
3445    malloc in them to see if additional memory allocation was needed.
3446 
3447    The block AIJ format is fully compatible with standard Fortran 77
3448    storage.  That is, the stored row and column indices can begin at
3449    either one (as in Fortran) or zero.  See the users' manual for details.
3450 
3451    Specify the preallocated storage with either nz or nnz (not both).
3452    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3453    allocation.  See Users-Manual: ch_mat for details.
3454 
3455 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3456 @*/
3457 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3458 {
3459   PetscErrorCode ierr;
3460 
3461   PetscFunctionBegin;
3462   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3463   PetscValidType(B,1);
3464   PetscValidLogicalCollectiveInt(B,bs,2);
3465   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3466   PetscFunctionReturn(0);
3467 }
3468 
3469 /*@C
3470    MatSeqBAIJSetPreallocationCSR - Allocates memory for a sparse sequential matrix in AIJ format
3471    (the default sequential PETSc format).
3472 
3473    Collective on MPI_Comm
3474 
3475    Input Parameters:
3476 +  B - the matrix
3477 .  i - the indices into j for the start of each local row (starts with zero)
3478 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3479 -  v - optional values in the matrix
3480 
3481    Level: developer
3482 
3483    Notes:
3484    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3485    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3486    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3487    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3488    block column and the second index is over columns within a block.
3489 
3490 .keywords: matrix, aij, compressed row, sparse
3491 
3492 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3493 @*/
3494 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3495 {
3496   PetscErrorCode ierr;
3497 
3498   PetscFunctionBegin;
3499   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3500   PetscValidType(B,1);
3501   PetscValidLogicalCollectiveInt(B,bs,2);
3502   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3503   PetscFunctionReturn(0);
3504 }
3505 
3506 
3507 /*@
3508      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3509 
3510      Collective on MPI_Comm
3511 
3512    Input Parameters:
3513 +  comm - must be an MPI communicator of size 1
3514 .  bs - size of block
3515 .  m - number of rows
3516 .  n - number of columns
3517 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3518 .  j - column indices
3519 -  a - matrix values
3520 
3521    Output Parameter:
3522 .  mat - the matrix
3523 
3524    Level: advanced
3525 
3526    Notes:
3527        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3528     once the matrix is destroyed
3529 
3530        You cannot set new nonzero locations into this matrix, that will generate an error.
3531 
3532        The i and j indices are 0 based
3533 
3534        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3535 
3536       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3537       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3538       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3539       with column-major ordering within blocks.
3540 
3541 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3542 
3543 @*/
3544 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3545 {
3546   PetscErrorCode ierr;
3547   PetscInt       ii;
3548   Mat_SeqBAIJ    *baij;
3549 
3550   PetscFunctionBegin;
3551   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3552   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3553 
3554   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3555   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3556   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3557   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,0);CHKERRQ(ierr);
3558   baij = (Mat_SeqBAIJ*)(*mat)->data;
3559   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3560   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3561 
3562   baij->i = i;
3563   baij->j = j;
3564   baij->a = a;
3565 
3566   baij->singlemalloc = PETSC_FALSE;
3567   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3568   baij->free_a       = PETSC_FALSE;
3569   baij->free_ij      = PETSC_FALSE;
3570 
3571   for (ii=0; ii<m; ii++) {
3572     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3573 #if defined(PETSC_USE_DEBUG)
3574     if (i[ii+1] - i[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3575 #endif
3576   }
3577 #if defined(PETSC_USE_DEBUG)
3578   for (ii=0; ii<baij->i[m]; ii++) {
3579     if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3580     if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3581   }
3582 #endif
3583 
3584   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3585   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3586   PetscFunctionReturn(0);
3587 }
3588 
3589 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3590 {
3591   PetscErrorCode ierr;
3592   PetscMPIInt    size;
3593 
3594   PetscFunctionBegin;
3595   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
3596   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3597     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3598   } else {
3599     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3600   }
3601   PetscFunctionReturn(0);
3602 }
3603