xref: /petsc/src/mat/impls/baij/seq/baij.c (revision dfd57a172ac9fa6c7b5fe6de6ab5df85cefc2996)
1 
2 /*
3     Defines the basic matrix operations for the BAIJ (compressed row)
4   matrix storage format.
5 */
6 #include <../src/mat/impls/baij/seq/baij.h>  /*I   "petscmat.h"  I*/
7 #include <petscblaslapack.h>
8 #include <petsc/private/kernels/blockinvert.h>
9 #include <petsc/private/kernels/blockmatmult.h>
10 
11 #if defined(PETSC_HAVE_HYPRE)
12 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat,MatType,MatReuse,Mat*);
13 #endif
14 
15 #if defined(PETSC_HAVE_MKL_SPARSE_OPTIMIZE)
16 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBAIJMKL(Mat,MatType,MatReuse,Mat*);
17 #endif
18 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat,MatType,MatReuse,Mat*);
19 
20 PetscErrorCode MatInvertBlockDiagonal_SeqBAIJ(Mat A,const PetscScalar **values)
21 {
22   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*) A->data;
23   PetscErrorCode ierr;
24   PetscInt       *diag_offset,i,bs = A->rmap->bs,mbs = a->mbs,ipvt[5],bs2 = bs*bs,*v_pivots;
25   MatScalar      *v    = a->a,*odiag,*diag,work[25],*v_work;
26   PetscReal      shift = 0.0;
27   PetscBool      allowzeropivot,zeropivotdetected=PETSC_FALSE;
28 
29   PetscFunctionBegin;
30   allowzeropivot = PetscNot(A->erroriffailure);
31 
32   if (a->idiagvalid) {
33     if (values) *values = a->idiag;
34     PetscFunctionReturn(0);
35   }
36   ierr        = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
37   diag_offset = a->diag;
38   if (!a->idiag) {
39     ierr = PetscMalloc1(bs2*mbs,&a->idiag);CHKERRQ(ierr);
40     ierr = PetscLogObjectMemory((PetscObject)A,bs2*mbs*sizeof(PetscScalar));CHKERRQ(ierr);
41   }
42   diag  = a->idiag;
43   if (values) *values = a->idiag;
44   /* factor and invert each block */
45   switch (bs) {
46   case 1:
47     for (i=0; i<mbs; i++) {
48       odiag    = v + 1*diag_offset[i];
49       diag[0]  = odiag[0];
50 
51       if (PetscAbsScalar(diag[0] + shift) < PETSC_MACHINE_EPSILON) {
52         if (allowzeropivot) {
53           A->factorerrortype             = MAT_FACTOR_NUMERIC_ZEROPIVOT;
54           A->factorerror_zeropivot_value = PetscAbsScalar(diag[0]);
55           A->factorerror_zeropivot_row   = i;
56           ierr = PetscInfo1(A,"Zero pivot, row %D\n",i);CHKERRQ(ierr);
57         } else SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Zero pivot, row %D pivot value %g tolerance %g",i,(double)PetscAbsScalar(diag[0]),(double)PETSC_MACHINE_EPSILON);
58       }
59 
60       diag[0]  = (PetscScalar)1.0 / (diag[0] + shift);
61       diag    += 1;
62     }
63     break;
64   case 2:
65     for (i=0; i<mbs; i++) {
66       odiag    = v + 4*diag_offset[i];
67       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
68       ierr     = PetscKernel_A_gets_inverse_A_2(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
69       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
70       diag    += 4;
71     }
72     break;
73   case 3:
74     for (i=0; i<mbs; i++) {
75       odiag    = v + 9*diag_offset[i];
76       diag[0]  = odiag[0]; diag[1] = odiag[1]; diag[2] = odiag[2]; diag[3] = odiag[3];
77       diag[4]  = odiag[4]; diag[5] = odiag[5]; diag[6] = odiag[6]; diag[7] = odiag[7];
78       diag[8]  = odiag[8];
79       ierr     = PetscKernel_A_gets_inverse_A_3(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
80       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
81       diag    += 9;
82     }
83     break;
84   case 4:
85     for (i=0; i<mbs; i++) {
86       odiag  = v + 16*diag_offset[i];
87       ierr   = PetscArraycpy(diag,odiag,16);CHKERRQ(ierr);
88       ierr   = PetscKernel_A_gets_inverse_A_4(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
89       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
90       diag  += 16;
91     }
92     break;
93   case 5:
94     for (i=0; i<mbs; i++) {
95       odiag  = v + 25*diag_offset[i];
96       ierr   = PetscArraycpy(diag,odiag,25);CHKERRQ(ierr);
97       ierr   = PetscKernel_A_gets_inverse_A_5(diag,ipvt,work,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
98       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
99       diag  += 25;
100     }
101     break;
102   case 6:
103     for (i=0; i<mbs; i++) {
104       odiag  = v + 36*diag_offset[i];
105       ierr   = PetscArraycpy(diag,odiag,36);CHKERRQ(ierr);
106       ierr   = PetscKernel_A_gets_inverse_A_6(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
107       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
108       diag  += 36;
109     }
110     break;
111   case 7:
112     for (i=0; i<mbs; i++) {
113       odiag  = v + 49*diag_offset[i];
114       ierr   = PetscArraycpy(diag,odiag,49);CHKERRQ(ierr);
115       ierr   = PetscKernel_A_gets_inverse_A_7(diag,shift,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
116       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
117       diag  += 49;
118     }
119     break;
120   default:
121     ierr = PetscMalloc2(bs,&v_work,bs,&v_pivots);CHKERRQ(ierr);
122     for (i=0; i<mbs; i++) {
123       odiag  = v + bs2*diag_offset[i];
124       ierr   = PetscArraycpy(diag,odiag,bs2);CHKERRQ(ierr);
125       ierr   = PetscKernel_A_gets_inverse_A(bs,diag,v_pivots,v_work,allowzeropivot,&zeropivotdetected);CHKERRQ(ierr);
126       if (zeropivotdetected) A->factorerrortype = MAT_FACTOR_NUMERIC_ZEROPIVOT;
127       diag  += bs2;
128     }
129     ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
130   }
131   a->idiagvalid = PETSC_TRUE;
132   PetscFunctionReturn(0);
133 }
134 
135 PetscErrorCode MatSOR_SeqBAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
136 {
137   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
138   PetscScalar       *x,*work,*w,*workt,*t;
139   const MatScalar   *v,*aa = a->a, *idiag;
140   const PetscScalar *b,*xb;
141   PetscScalar       s[7], xw[7]={0}; /* avoid some compilers thinking xw is uninitialized */
142   PetscErrorCode    ierr;
143   PetscInt          m = a->mbs,i,i2,nz,bs = A->rmap->bs,bs2 = bs*bs,k,j,idx,it;
144   const PetscInt    *diag,*ai = a->i,*aj = a->j,*vi;
145 
146   PetscFunctionBegin;
147   its = its*lits;
148   if (flag & SOR_EISENSTAT) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
149   if (its <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
150   if (fshift) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
151   if (omega != 1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for non-trivial relaxation factor");
152   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
153 
154   if (!a->idiagvalid) {ierr = MatInvertBlockDiagonal(A,NULL);CHKERRQ(ierr);}
155 
156   if (!m) PetscFunctionReturn(0);
157   diag  = a->diag;
158   idiag = a->idiag;
159   k    = PetscMax(A->rmap->n,A->cmap->n);
160   if (!a->mult_work) {
161     ierr = PetscMalloc1(k+1,&a->mult_work);CHKERRQ(ierr);
162   }
163   if (!a->sor_workt) {
164     ierr = PetscMalloc1(k,&a->sor_workt);CHKERRQ(ierr);
165   }
166   if (!a->sor_work) {
167     ierr = PetscMalloc1(bs,&a->sor_work);CHKERRQ(ierr);
168   }
169   work = a->mult_work;
170   t    = a->sor_workt;
171   w    = a->sor_work;
172 
173   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
174   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
175 
176   if (flag & SOR_ZERO_INITIAL_GUESS) {
177     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
178       switch (bs) {
179       case 1:
180         PetscKernel_v_gets_A_times_w_1(x,idiag,b);
181         t[0] = b[0];
182         i2     = 1;
183         idiag += 1;
184         for (i=1; i<m; i++) {
185           v  = aa + ai[i];
186           vi = aj + ai[i];
187           nz = diag[i] - ai[i];
188           s[0] = b[i2];
189           for (j=0; j<nz; j++) {
190             xw[0] = x[vi[j]];
191             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
192           }
193           t[i2] = s[0];
194           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
195           x[i2]  = xw[0];
196           idiag += 1;
197           i2    += 1;
198         }
199         break;
200       case 2:
201         PetscKernel_v_gets_A_times_w_2(x,idiag,b);
202         t[0] = b[0]; t[1] = b[1];
203         i2     = 2;
204         idiag += 4;
205         for (i=1; i<m; i++) {
206           v  = aa + 4*ai[i];
207           vi = aj + ai[i];
208           nz = diag[i] - ai[i];
209           s[0] = b[i2]; s[1] = b[i2+1];
210           for (j=0; j<nz; j++) {
211             idx = 2*vi[j];
212             it  = 4*j;
213             xw[0] = x[idx]; xw[1] = x[1+idx];
214             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
215           }
216           t[i2] = s[0]; t[i2+1] = s[1];
217           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
218           x[i2]   = xw[0]; x[i2+1] = xw[1];
219           idiag  += 4;
220           i2     += 2;
221         }
222         break;
223       case 3:
224         PetscKernel_v_gets_A_times_w_3(x,idiag,b);
225         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
226         i2     = 3;
227         idiag += 9;
228         for (i=1; i<m; i++) {
229           v  = aa + 9*ai[i];
230           vi = aj + ai[i];
231           nz = diag[i] - ai[i];
232           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
233           while (nz--) {
234             idx = 3*(*vi++);
235             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
236             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
237             v  += 9;
238           }
239           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
240           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
241           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
242           idiag  += 9;
243           i2     += 3;
244         }
245         break;
246       case 4:
247         PetscKernel_v_gets_A_times_w_4(x,idiag,b);
248         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3];
249         i2     = 4;
250         idiag += 16;
251         for (i=1; i<m; i++) {
252           v  = aa + 16*ai[i];
253           vi = aj + ai[i];
254           nz = diag[i] - ai[i];
255           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
256           while (nz--) {
257             idx = 4*(*vi++);
258             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
259             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
260             v  += 16;
261           }
262           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2 + 3] = s[3];
263           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
264           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
265           idiag  += 16;
266           i2     += 4;
267         }
268         break;
269       case 5:
270         PetscKernel_v_gets_A_times_w_5(x,idiag,b);
271         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4];
272         i2     = 5;
273         idiag += 25;
274         for (i=1; i<m; i++) {
275           v  = aa + 25*ai[i];
276           vi = aj + ai[i];
277           nz = diag[i] - ai[i];
278           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
279           while (nz--) {
280             idx = 5*(*vi++);
281             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
282             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
283             v  += 25;
284           }
285           t[i2] = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2]; t[i2+3] = s[3]; t[i2+4] = s[4];
286           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
287           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
288           idiag  += 25;
289           i2     += 5;
290         }
291         break;
292       case 6:
293         PetscKernel_v_gets_A_times_w_6(x,idiag,b);
294         t[0] = b[0]; t[1] = b[1]; t[2] = b[2]; t[3] = b[3]; t[4] = b[4]; t[5] = b[5];
295         i2     = 6;
296         idiag += 36;
297         for (i=1; i<m; i++) {
298           v  = aa + 36*ai[i];
299           vi = aj + ai[i];
300           nz = diag[i] - ai[i];
301           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
302           while (nz--) {
303             idx = 6*(*vi++);
304             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
305             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
306             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
307             v  += 36;
308           }
309           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
310           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5];
311           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
312           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
313           idiag  += 36;
314           i2     += 6;
315         }
316         break;
317       case 7:
318         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
319         t[0] = b[0]; t[1] = b[1]; t[2] = b[2];
320         t[3] = b[3]; t[4] = b[4]; t[5] = b[5]; t[6] = b[6];
321         i2     = 7;
322         idiag += 49;
323         for (i=1; i<m; i++) {
324           v  = aa + 49*ai[i];
325           vi = aj + ai[i];
326           nz = diag[i] - ai[i];
327           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
328           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
329           while (nz--) {
330             idx = 7*(*vi++);
331             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
332             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
333             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
334             v  += 49;
335           }
336           t[i2]   = s[0]; t[i2+1] = s[1]; t[i2+2] = s[2];
337           t[i2+3] = s[3]; t[i2+4] = s[4]; t[i2+5] = s[5]; t[i2+6] = s[6];
338           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
339           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
340           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
341           idiag  += 49;
342           i2     += 7;
343         }
344         break;
345       default:
346         PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);
347         ierr = PetscArraycpy(t,b,bs);CHKERRQ(ierr);
348         i2     = bs;
349         idiag += bs2;
350         for (i=1; i<m; i++) {
351           v  = aa + bs2*ai[i];
352           vi = aj + ai[i];
353           nz = diag[i] - ai[i];
354 
355           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
356           /* copy all rows of x that are needed into contiguous space */
357           workt = work;
358           for (j=0; j<nz; j++) {
359             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
360             workt += bs;
361           }
362           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
363           ierr = PetscArraycpy(t+i2,w,bs);CHKERRQ(ierr);
364           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
365 
366           idiag += bs2;
367           i2    += bs;
368         }
369         break;
370       }
371       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
372       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
373       xb = t;
374     }
375     else xb = b;
376     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
377       idiag = a->idiag+bs2*(a->mbs-1);
378       i2 = bs * (m-1);
379       switch (bs) {
380       case 1:
381         s[0]  = xb[i2];
382         PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
383         x[i2] = xw[0];
384         i2   -= 1;
385         for (i=m-2; i>=0; i--) {
386           v  = aa + (diag[i]+1);
387           vi = aj + diag[i] + 1;
388           nz = ai[i+1] - diag[i] - 1;
389           s[0] = xb[i2];
390           for (j=0; j<nz; j++) {
391             xw[0] = x[vi[j]];
392             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
393           }
394           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
395           x[i2]  = xw[0];
396           idiag -= 1;
397           i2    -= 1;
398         }
399         break;
400       case 2:
401         s[0]  = xb[i2]; s[1] = xb[i2+1];
402         PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
403         x[i2] = xw[0]; x[i2+1] = xw[1];
404         i2    -= 2;
405         idiag -= 4;
406         for (i=m-2; i>=0; i--) {
407           v  = aa + 4*(diag[i] + 1);
408           vi = aj + diag[i] + 1;
409           nz = ai[i+1] - diag[i] - 1;
410           s[0] = xb[i2]; s[1] = xb[i2+1];
411           for (j=0; j<nz; j++) {
412             idx = 2*vi[j];
413             it  = 4*j;
414             xw[0] = x[idx]; xw[1] = x[1+idx];
415             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
416           }
417           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
418           x[i2]   = xw[0]; x[i2+1] = xw[1];
419           idiag  -= 4;
420           i2     -= 2;
421         }
422         break;
423       case 3:
424         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
425         PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
426         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
427         i2    -= 3;
428         idiag -= 9;
429         for (i=m-2; i>=0; i--) {
430           v  = aa + 9*(diag[i]+1);
431           vi = aj + diag[i] + 1;
432           nz = ai[i+1] - diag[i] - 1;
433           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2];
434           while (nz--) {
435             idx = 3*(*vi++);
436             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
437             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
438             v  += 9;
439           }
440           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
441           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
442           idiag  -= 9;
443           i2     -= 3;
444         }
445         break;
446       case 4:
447         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
448         PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
449         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
450         i2    -= 4;
451         idiag -= 16;
452         for (i=m-2; i>=0; i--) {
453           v  = aa + 16*(diag[i]+1);
454           vi = aj + diag[i] + 1;
455           nz = ai[i+1] - diag[i] - 1;
456           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3];
457           while (nz--) {
458             idx = 4*(*vi++);
459             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
460             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
461             v  += 16;
462           }
463           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
464           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3];
465           idiag  -= 16;
466           i2     -= 4;
467         }
468         break;
469       case 5:
470         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
471         PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
472         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
473         i2    -= 5;
474         idiag -= 25;
475         for (i=m-2; i>=0; i--) {
476           v  = aa + 25*(diag[i]+1);
477           vi = aj + diag[i] + 1;
478           nz = ai[i+1] - diag[i] - 1;
479           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4];
480           while (nz--) {
481             idx = 5*(*vi++);
482             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
483             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
484             v  += 25;
485           }
486           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
487           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4];
488           idiag  -= 25;
489           i2     -= 5;
490         }
491         break;
492       case 6:
493         s[0]  = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
494         PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
495         x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
496         i2    -= 6;
497         idiag -= 36;
498         for (i=m-2; i>=0; i--) {
499           v  = aa + 36*(diag[i]+1);
500           vi = aj + diag[i] + 1;
501           nz = ai[i+1] - diag[i] - 1;
502           s[0] = xb[i2]; s[1] = xb[i2+1]; s[2] = xb[i2+2]; s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5];
503           while (nz--) {
504             idx = 6*(*vi++);
505             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
506             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
507             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
508             v  += 36;
509           }
510           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
511           x[i2] = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2]; x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5];
512           idiag  -= 36;
513           i2     -= 6;
514         }
515         break;
516       case 7:
517         s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
518         s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
519         PetscKernel_v_gets_A_times_w_7(x,idiag,b);
520         x[i2]   = xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
521         x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
522         i2    -= 7;
523         idiag -= 49;
524         for (i=m-2; i>=0; i--) {
525           v  = aa + 49*(diag[i]+1);
526           vi = aj + diag[i] + 1;
527           nz = ai[i+1] - diag[i] - 1;
528           s[0] = xb[i2];   s[1] = xb[i2+1]; s[2] = xb[i2+2];
529           s[3] = xb[i2+3]; s[4] = xb[i2+4]; s[5] = xb[i2+5]; s[6] = xb[i2+6];
530           while (nz--) {
531             idx = 7*(*vi++);
532             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
533             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
534             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
535             v  += 49;
536           }
537           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
538           x[i2] =   xw[0]; x[i2+1] = xw[1]; x[i2+2] = xw[2];
539           x[i2+3] = xw[3]; x[i2+4] = xw[4]; x[i2+5] = xw[5]; x[i2+6] = xw[6];
540           idiag  -= 49;
541           i2     -= 7;
542         }
543         break;
544       default:
545         ierr  = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
546         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
547         i2    -= bs;
548         idiag -= bs2;
549         for (i=m-2; i>=0; i--) {
550           v  = aa + bs2*(diag[i]+1);
551           vi = aj + diag[i] + 1;
552           nz = ai[i+1] - diag[i] - 1;
553 
554           ierr = PetscArraycpy(w,xb+i2,bs);CHKERRQ(ierr);
555           /* copy all rows of x that are needed into contiguous space */
556           workt = work;
557           for (j=0; j<nz; j++) {
558             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
559             workt += bs;
560           }
561           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
562           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
563 
564           idiag -= bs2;
565           i2    -= bs;
566         }
567         break;
568       }
569       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
570     }
571     its--;
572   }
573   while (its--) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       idiag = a->idiag;
576       i2 = 0;
577       switch (bs) {
578       case 1:
579         for (i=0; i<m; i++) {
580           v  = aa + ai[i];
581           vi = aj + ai[i];
582           nz = ai[i+1] - ai[i];
583           s[0] = b[i2];
584           for (j=0; j<nz; j++) {
585             xw[0] = x[vi[j]];
586             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
587           }
588           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
589           x[i2] += xw[0];
590           idiag += 1;
591           i2    += 1;
592         }
593         break;
594       case 2:
595         for (i=0; i<m; i++) {
596           v  = aa + 4*ai[i];
597           vi = aj + ai[i];
598           nz = ai[i+1] - ai[i];
599           s[0] = b[i2]; s[1] = b[i2+1];
600           for (j=0; j<nz; j++) {
601             idx = 2*vi[j];
602             it  = 4*j;
603             xw[0] = x[idx]; xw[1] = x[1+idx];
604             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
605           }
606           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
607           x[i2]  += xw[0]; x[i2+1] += xw[1];
608           idiag  += 4;
609           i2     += 2;
610         }
611         break;
612       case 3:
613         for (i=0; i<m; i++) {
614           v  = aa + 9*ai[i];
615           vi = aj + ai[i];
616           nz = ai[i+1] - ai[i];
617           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
618           while (nz--) {
619             idx = 3*(*vi++);
620             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
621             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
622             v  += 9;
623           }
624           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
625           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
626           idiag  += 9;
627           i2     += 3;
628         }
629         break;
630       case 4:
631         for (i=0; i<m; i++) {
632           v  = aa + 16*ai[i];
633           vi = aj + ai[i];
634           nz = ai[i+1] - ai[i];
635           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
636           while (nz--) {
637             idx = 4*(*vi++);
638             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
639             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
640             v  += 16;
641           }
642           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
643           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
644           idiag  += 16;
645           i2     += 4;
646         }
647         break;
648       case 5:
649         for (i=0; i<m; i++) {
650           v  = aa + 25*ai[i];
651           vi = aj + ai[i];
652           nz = ai[i+1] - ai[i];
653           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
654           while (nz--) {
655             idx = 5*(*vi++);
656             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
657             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
658             v  += 25;
659           }
660           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
661           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
662           idiag  += 25;
663           i2     += 5;
664         }
665         break;
666       case 6:
667         for (i=0; i<m; i++) {
668           v  = aa + 36*ai[i];
669           vi = aj + ai[i];
670           nz = ai[i+1] - ai[i];
671           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
672           while (nz--) {
673             idx = 6*(*vi++);
674             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
675             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
676             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
677             v  += 36;
678           }
679           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
680           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
681           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
682           idiag  += 36;
683           i2     += 6;
684         }
685         break;
686       case 7:
687         for (i=0; i<m; i++) {
688           v  = aa + 49*ai[i];
689           vi = aj + ai[i];
690           nz = ai[i+1] - ai[i];
691           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
692           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
693           while (nz--) {
694             idx = 7*(*vi++);
695             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
696             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
697             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
698             v  += 49;
699           }
700           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
701           x[i2]   += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
702           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
703           idiag  += 49;
704           i2     += 7;
705         }
706         break;
707       default:
708         for (i=0; i<m; i++) {
709           v  = aa + bs2*ai[i];
710           vi = aj + ai[i];
711           nz = ai[i+1] - ai[i];
712 
713           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
714           /* copy all rows of x that are needed into contiguous space */
715           workt = work;
716           for (j=0; j<nz; j++) {
717             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
718             workt += bs;
719           }
720           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
721           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
722 
723           idiag += bs2;
724           i2    += bs;
725         }
726         break;
727       }
728       ierr = PetscLogFlops(2.0*bs2*a->nz);CHKERRQ(ierr);
729     }
730     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
731       idiag = a->idiag+bs2*(a->mbs-1);
732       i2 = bs * (m-1);
733       switch (bs) {
734       case 1:
735         for (i=m-1; i>=0; i--) {
736           v  = aa + ai[i];
737           vi = aj + ai[i];
738           nz = ai[i+1] - ai[i];
739           s[0] = b[i2];
740           for (j=0; j<nz; j++) {
741             xw[0] = x[vi[j]];
742             PetscKernel_v_gets_v_minus_A_times_w_1(s,(v+j),xw);
743           }
744           PetscKernel_v_gets_A_times_w_1(xw,idiag,s);
745           x[i2] += xw[0];
746           idiag -= 1;
747           i2    -= 1;
748         }
749         break;
750       case 2:
751         for (i=m-1; i>=0; i--) {
752           v  = aa + 4*ai[i];
753           vi = aj + ai[i];
754           nz = ai[i+1] - ai[i];
755           s[0] = b[i2]; s[1] = b[i2+1];
756           for (j=0; j<nz; j++) {
757             idx = 2*vi[j];
758             it  = 4*j;
759             xw[0] = x[idx]; xw[1] = x[1+idx];
760             PetscKernel_v_gets_v_minus_A_times_w_2(s,(v+it),xw);
761           }
762           PetscKernel_v_gets_A_times_w_2(xw,idiag,s);
763           x[i2]  += xw[0]; x[i2+1] += xw[1];
764           idiag  -= 4;
765           i2     -= 2;
766         }
767         break;
768       case 3:
769         for (i=m-1; i>=0; i--) {
770           v  = aa + 9*ai[i];
771           vi = aj + ai[i];
772           nz = ai[i+1] - ai[i];
773           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2];
774           while (nz--) {
775             idx = 3*(*vi++);
776             xw[0] = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx];
777             PetscKernel_v_gets_v_minus_A_times_w_3(s,v,xw);
778             v  += 9;
779           }
780           PetscKernel_v_gets_A_times_w_3(xw,idiag,s);
781           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
782           idiag  -= 9;
783           i2     -= 3;
784         }
785         break;
786       case 4:
787         for (i=m-1; i>=0; i--) {
788           v  = aa + 16*ai[i];
789           vi = aj + ai[i];
790           nz = ai[i+1] - ai[i];
791           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3];
792           while (nz--) {
793             idx = 4*(*vi++);
794             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx];
795             PetscKernel_v_gets_v_minus_A_times_w_4(s,v,xw);
796             v  += 16;
797           }
798           PetscKernel_v_gets_A_times_w_4(xw,idiag,s);
799           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3];
800           idiag  -= 16;
801           i2     -= 4;
802         }
803         break;
804       case 5:
805         for (i=m-1; i>=0; i--) {
806           v  = aa + 25*ai[i];
807           vi = aj + ai[i];
808           nz = ai[i+1] - ai[i];
809           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4];
810           while (nz--) {
811             idx = 5*(*vi++);
812             xw[0]  = x[idx]; xw[1] = x[1+idx]; xw[2] = x[2+idx]; xw[3] = x[3+idx]; xw[4] = x[4+idx];
813             PetscKernel_v_gets_v_minus_A_times_w_5(s,v,xw);
814             v  += 25;
815           }
816           PetscKernel_v_gets_A_times_w_5(xw,idiag,s);
817           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2]; x[i2+3] += xw[3]; x[i2+4] += xw[4];
818           idiag  -= 25;
819           i2     -= 5;
820         }
821         break;
822       case 6:
823         for (i=m-1; i>=0; i--) {
824           v  = aa + 36*ai[i];
825           vi = aj + ai[i];
826           nz = ai[i+1] - ai[i];
827           s[0] = b[i2]; s[1] = b[i2+1]; s[2] = b[i2+2]; s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5];
828           while (nz--) {
829             idx = 6*(*vi++);
830             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
831             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx];
832             PetscKernel_v_gets_v_minus_A_times_w_6(s,v,xw);
833             v  += 36;
834           }
835           PetscKernel_v_gets_A_times_w_6(xw,idiag,s);
836           x[i2] += xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
837           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5];
838           idiag  -= 36;
839           i2     -= 6;
840         }
841         break;
842       case 7:
843         for (i=m-1; i>=0; i--) {
844           v  = aa + 49*ai[i];
845           vi = aj + ai[i];
846           nz = ai[i+1] - ai[i];
847           s[0] = b[i2];   s[1] = b[i2+1]; s[2] = b[i2+2];
848           s[3] = b[i2+3]; s[4] = b[i2+4]; s[5] = b[i2+5]; s[6] = b[i2+6];
849           while (nz--) {
850             idx = 7*(*vi++);
851             xw[0] = x[idx];   xw[1] = x[1+idx]; xw[2] = x[2+idx];
852             xw[3] = x[3+idx]; xw[4] = x[4+idx]; xw[5] = x[5+idx]; xw[6] = x[6+idx];
853             PetscKernel_v_gets_v_minus_A_times_w_7(s,v,xw);
854             v  += 49;
855           }
856           PetscKernel_v_gets_A_times_w_7(xw,idiag,s);
857           x[i2] +=   xw[0]; x[i2+1] += xw[1]; x[i2+2] += xw[2];
858           x[i2+3] += xw[3]; x[i2+4] += xw[4]; x[i2+5] += xw[5]; x[i2+6] += xw[6];
859           idiag  -= 49;
860           i2     -= 7;
861         }
862         break;
863       default:
864         for (i=m-1; i>=0; i--) {
865           v  = aa + bs2*ai[i];
866           vi = aj + ai[i];
867           nz = ai[i+1] - ai[i];
868 
869           ierr = PetscArraycpy(w,b+i2,bs);CHKERRQ(ierr);
870           /* copy all rows of x that are needed into contiguous space */
871           workt = work;
872           for (j=0; j<nz; j++) {
873             ierr   = PetscArraycpy(workt,x + bs*(*vi++),bs);CHKERRQ(ierr);
874             workt += bs;
875           }
876           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,v,work);
877           PetscKernel_w_gets_w_plus_Ar_times_v(bs,bs,w,idiag,x+i2);
878 
879           idiag -= bs2;
880           i2    -= bs;
881         }
882         break;
883       }
884       ierr = PetscLogFlops(2.0*bs2*(a->nz));CHKERRQ(ierr);
885     }
886   }
887   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
888   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 /*
893     Special version for direct calls from Fortran (Used in PETSc-fun3d)
894 */
895 #if defined(PETSC_HAVE_FORTRAN_CAPS)
896 #define matsetvaluesblocked4_ MATSETVALUESBLOCKED4
897 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
898 #define matsetvaluesblocked4_ matsetvaluesblocked4
899 #endif
900 
901 PETSC_EXTERN void matsetvaluesblocked4_(Mat *AA,PetscInt *mm,const PetscInt im[],PetscInt *nn,const PetscInt in[],const PetscScalar v[])
902 {
903   Mat               A  = *AA;
904   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
905   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,N,m = *mm,n = *nn;
906   PetscInt          *ai    =a->i,*ailen=a->ilen;
907   PetscInt          *aj    =a->j,stepval,lastcol = -1;
908   const PetscScalar *value = v;
909   MatScalar         *ap,*aa = a->a,*bap;
910   PetscErrorCode    ierr;
911 
912   PetscFunctionBegin;
913   if (A->rmap->bs != 4) SETERRABORT(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Can only be called with a block size of 4");
914   stepval = (n-1)*4;
915   for (k=0; k<m; k++) { /* loop over added rows */
916     row  = im[k];
917     rp   = aj + ai[row];
918     ap   = aa + 16*ai[row];
919     nrow = ailen[row];
920     low  = 0;
921     high = nrow;
922     for (l=0; l<n; l++) { /* loop over added columns */
923       col = in[l];
924       if (col <= lastcol)  low = 0;
925       else                high = nrow;
926       lastcol = col;
927       value   = v + k*(stepval+4 + l)*4;
928       while (high-low > 7) {
929         t = (low+high)/2;
930         if (rp[t] > col) high = t;
931         else             low  = t;
932       }
933       for (i=low; i<high; i++) {
934         if (rp[i] > col) break;
935         if (rp[i] == col) {
936           bap = ap +  16*i;
937           for (ii=0; ii<4; ii++,value+=stepval) {
938             for (jj=ii; jj<16; jj+=4) {
939               bap[jj] += *value++;
940             }
941           }
942           goto noinsert2;
943         }
944       }
945       N = nrow++ - 1;
946       high++; /* added new column index thus must search to one higher than before */
947       /* shift up all the later entries in this row */
948       for (ii=N; ii>=i; ii--) {
949         rp[ii+1] = rp[ii];
950         ierr = PetscArraycpy(ap+16*(ii+1),ap+16*(ii),16);CHKERRV(ierr);
951       }
952       if (N >= i) {
953         ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
954       }
955       rp[i] = col;
956       bap   = ap +  16*i;
957       for (ii=0; ii<4; ii++,value+=stepval) {
958         for (jj=ii; jj<16; jj+=4) {
959           bap[jj] = *value++;
960         }
961       }
962       noinsert2:;
963       low = i;
964     }
965     ailen[row] = nrow;
966   }
967   PetscFunctionReturnVoid();
968 }
969 
970 #if defined(PETSC_HAVE_FORTRAN_CAPS)
971 #define matsetvalues4_ MATSETVALUES4
972 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE)
973 #define matsetvalues4_ matsetvalues4
974 #endif
975 
976 PETSC_EXTERN void matsetvalues4_(Mat *AA,PetscInt *mm,PetscInt *im,PetscInt *nn,PetscInt *in,PetscScalar *v)
977 {
978   Mat         A  = *AA;
979   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
980   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,N,n = *nn,m = *mm;
981   PetscInt    *ai=a->i,*ailen=a->ilen;
982   PetscInt    *aj=a->j,brow,bcol;
983   PetscInt    ridx,cidx,lastcol = -1;
984   MatScalar   *ap,value,*aa=a->a,*bap;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   for (k=0; k<m; k++) { /* loop over added rows */
989     row  = im[k]; brow = row/4;
990     rp   = aj + ai[brow];
991     ap   = aa + 16*ai[brow];
992     nrow = ailen[brow];
993     low  = 0;
994     high = nrow;
995     for (l=0; l<n; l++) { /* loop over added columns */
996       col   = in[l]; bcol = col/4;
997       ridx  = row % 4; cidx = col % 4;
998       value = v[l + k*n];
999       if (col <= lastcol)  low = 0;
1000       else                high = nrow;
1001       lastcol = col;
1002       while (high-low > 7) {
1003         t = (low+high)/2;
1004         if (rp[t] > bcol) high = t;
1005         else              low  = t;
1006       }
1007       for (i=low; i<high; i++) {
1008         if (rp[i] > bcol) break;
1009         if (rp[i] == bcol) {
1010           bap   = ap +  16*i + 4*cidx + ridx;
1011           *bap += value;
1012           goto noinsert1;
1013         }
1014       }
1015       N = nrow++ - 1;
1016       high++; /* added new column thus must search to one higher than before */
1017       /* shift up all the later entries in this row */
1018       ierr = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRV(ierr);
1019       ierr = PetscArraymove(ap+16*i+16,ap+16*i,16*(N-i+1));CHKERRV(ierr);
1020       ierr = PetscArrayzero(ap+16*i,16);CHKERRV(ierr);
1021       rp[i]                    = bcol;
1022       ap[16*i + 4*cidx + ridx] = value;
1023 noinsert1:;
1024       low = i;
1025     }
1026     ailen[brow] = nrow;
1027   }
1028   PetscFunctionReturnVoid();
1029 }
1030 
1031 /*
1032      Checks for missing diagonals
1033 */
1034 PetscErrorCode MatMissingDiagonal_SeqBAIJ(Mat A,PetscBool  *missing,PetscInt *d)
1035 {
1036   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038   PetscInt       *diag,*ii = a->i,i;
1039 
1040   PetscFunctionBegin;
1041   ierr     = MatMarkDiagonal_SeqBAIJ(A);CHKERRQ(ierr);
1042   *missing = PETSC_FALSE;
1043   if (A->rmap->n > 0 && !ii) {
1044     *missing = PETSC_TRUE;
1045     if (d) *d = 0;
1046     ierr = PetscInfo(A,"Matrix has no entries therefore is missing diagonal\n");CHKERRQ(ierr);
1047   } else {
1048     PetscInt n;
1049     n = PetscMin(a->mbs, a->nbs);
1050     diag = a->diag;
1051     for (i=0; i<n; i++) {
1052       if (diag[i] >= ii[i+1]) {
1053         *missing = PETSC_TRUE;
1054         if (d) *d = i;
1055         ierr = PetscInfo1(A,"Matrix is missing block diagonal number %D\n",i);CHKERRQ(ierr);
1056         break;
1057       }
1058     }
1059   }
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 PetscErrorCode MatMarkDiagonal_SeqBAIJ(Mat A)
1064 {
1065   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1066   PetscErrorCode ierr;
1067   PetscInt       i,j,m = a->mbs;
1068 
1069   PetscFunctionBegin;
1070   if (!a->diag) {
1071     ierr         = PetscMalloc1(m,&a->diag);CHKERRQ(ierr);
1072     ierr         = PetscLogObjectMemory((PetscObject)A,m*sizeof(PetscInt));CHKERRQ(ierr);
1073     a->free_diag = PETSC_TRUE;
1074   }
1075   for (i=0; i<m; i++) {
1076     a->diag[i] = a->i[i+1];
1077     for (j=a->i[i]; j<a->i[i+1]; j++) {
1078       if (a->j[j] == i) {
1079         a->diag[i] = j;
1080         break;
1081       }
1082     }
1083   }
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 static PetscErrorCode MatGetRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *inia[],const PetscInt *inja[],PetscBool  *done)
1088 {
1089   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1090   PetscErrorCode ierr;
1091   PetscInt       i,j,n = a->mbs,nz = a->i[n],*tia,*tja,bs = A->rmap->bs,k,l,cnt;
1092   PetscInt       **ia = (PetscInt**)inia,**ja = (PetscInt**)inja;
1093 
1094   PetscFunctionBegin;
1095   *nn = n;
1096   if (!ia) PetscFunctionReturn(0);
1097   if (symmetric) {
1098     ierr = MatToSymmetricIJ_SeqAIJ(n,a->i,a->j,PETSC_TRUE,0,0,&tia,&tja);CHKERRQ(ierr);
1099     nz   = tia[n];
1100   } else {
1101     tia = a->i; tja = a->j;
1102   }
1103 
1104   if (!blockcompressed && bs > 1) {
1105     (*nn) *= bs;
1106     /* malloc & create the natural set of indices */
1107     ierr = PetscMalloc1((n+1)*bs,ia);CHKERRQ(ierr);
1108     if (n) {
1109       (*ia)[0] = oshift;
1110       for (j=1; j<bs; j++) {
1111         (*ia)[j] = (tia[1]-tia[0])*bs+(*ia)[j-1];
1112       }
1113     }
1114 
1115     for (i=1; i<n; i++) {
1116       (*ia)[i*bs] = (tia[i]-tia[i-1])*bs + (*ia)[i*bs-1];
1117       for (j=1; j<bs; j++) {
1118         (*ia)[i*bs+j] = (tia[i+1]-tia[i])*bs + (*ia)[i*bs+j-1];
1119       }
1120     }
1121     if (n) {
1122       (*ia)[n*bs] = (tia[n]-tia[n-1])*bs + (*ia)[n*bs-1];
1123     }
1124 
1125     if (inja) {
1126       ierr = PetscMalloc1(nz*bs*bs,ja);CHKERRQ(ierr);
1127       cnt = 0;
1128       for (i=0; i<n; i++) {
1129         for (j=0; j<bs; j++) {
1130           for (k=tia[i]; k<tia[i+1]; k++) {
1131             for (l=0; l<bs; l++) {
1132               (*ja)[cnt++] = bs*tja[k] + l;
1133             }
1134           }
1135         }
1136       }
1137     }
1138 
1139     if (symmetric) { /* deallocate memory allocated in MatToSymmetricIJ_SeqAIJ() */
1140       ierr = PetscFree(tia);CHKERRQ(ierr);
1141       ierr = PetscFree(tja);CHKERRQ(ierr);
1142     }
1143   } else if (oshift == 1) {
1144     if (symmetric) {
1145       nz = tia[A->rmap->n/bs];
1146       /*  add 1 to i and j indices */
1147       for (i=0; i<A->rmap->n/bs+1; i++) tia[i] = tia[i] + 1;
1148       *ia = tia;
1149       if (ja) {
1150         for (i=0; i<nz; i++) tja[i] = tja[i] + 1;
1151         *ja = tja;
1152       }
1153     } else {
1154       nz = a->i[A->rmap->n/bs];
1155       /* malloc space and  add 1 to i and j indices */
1156       ierr = PetscMalloc1(A->rmap->n/bs+1,ia);CHKERRQ(ierr);
1157       for (i=0; i<A->rmap->n/bs+1; i++) (*ia)[i] = a->i[i] + 1;
1158       if (ja) {
1159         ierr = PetscMalloc1(nz,ja);CHKERRQ(ierr);
1160         for (i=0; i<nz; i++) (*ja)[i] = a->j[i] + 1;
1161       }
1162     }
1163   } else {
1164     *ia = tia;
1165     if (ja) *ja = tja;
1166   }
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 static PetscErrorCode MatRestoreRowIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool blockcompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
1171 {
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   if (!ia) PetscFunctionReturn(0);
1176   if ((!blockcompressed && A->rmap->bs > 1) || (symmetric || oshift == 1)) {
1177     ierr = PetscFree(*ia);CHKERRQ(ierr);
1178     if (ja) {ierr = PetscFree(*ja);CHKERRQ(ierr);}
1179   }
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 PetscErrorCode MatDestroy_SeqBAIJ(Mat A)
1184 {
1185   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1186   PetscErrorCode ierr;
1187 
1188   PetscFunctionBegin;
1189 #if defined(PETSC_USE_LOG)
1190   PetscLogObjectState((PetscObject)A,"Rows=%D, Cols=%D, NZ=%D",A->rmap->N,A->cmap->n,a->nz);
1191 #endif
1192   ierr = MatSeqXAIJFreeAIJ(A,&a->a,&a->j,&a->i);CHKERRQ(ierr);
1193   ierr = ISDestroy(&a->row);CHKERRQ(ierr);
1194   ierr = ISDestroy(&a->col);CHKERRQ(ierr);
1195   if (a->free_diag) {ierr = PetscFree(a->diag);CHKERRQ(ierr);}
1196   ierr = PetscFree(a->idiag);CHKERRQ(ierr);
1197   if (a->free_imax_ilen) {ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);}
1198   ierr = PetscFree(a->solve_work);CHKERRQ(ierr);
1199   ierr = PetscFree(a->mult_work);CHKERRQ(ierr);
1200   ierr = PetscFree(a->sor_workt);CHKERRQ(ierr);
1201   ierr = PetscFree(a->sor_work);CHKERRQ(ierr);
1202   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
1203   ierr = PetscFree(a->saved_values);CHKERRQ(ierr);
1204   ierr = PetscFree2(a->compressedrow.i,a->compressedrow.rindex);CHKERRQ(ierr);
1205 
1206   ierr = MatDestroy(&a->sbaijMat);CHKERRQ(ierr);
1207   ierr = MatDestroy(&a->parent);CHKERRQ(ierr);
1208   ierr = PetscFree(A->data);CHKERRQ(ierr);
1209 
1210   ierr = PetscObjectChangeTypeName((PetscObject)A,NULL);CHKERRQ(ierr);
1211   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJGetArray_C",NULL);CHKERRQ(ierr);
1212   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJRestoreArray_C",NULL);CHKERRQ(ierr);
1213   ierr = PetscObjectComposeFunction((PetscObject)A,"MatInvertBlockDiagonal_C",NULL);CHKERRQ(ierr);
1214   ierr = PetscObjectComposeFunction((PetscObject)A,"MatStoreValues_C",NULL);CHKERRQ(ierr);
1215   ierr = PetscObjectComposeFunction((PetscObject)A,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
1216   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetColumnIndices_C",NULL);CHKERRQ(ierr);
1217   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqaij_C",NULL);CHKERRQ(ierr);
1218   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqsbaij_C",NULL);CHKERRQ(ierr);
1219   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
1220   ierr = PetscObjectComposeFunction((PetscObject)A,"MatSeqBAIJSetPreallocationCSR_C",NULL);CHKERRQ(ierr);
1221   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_seqbstrm_C",NULL);CHKERRQ(ierr);
1222   ierr = PetscObjectComposeFunction((PetscObject)A,"MatIsTranspose_C",NULL);CHKERRQ(ierr);
1223 #if defined(PETSC_HAVE_HYPRE)
1224   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_hypre_C",NULL);CHKERRQ(ierr);
1225 #endif
1226   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqbaij_is_C",NULL);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 PetscErrorCode MatSetOption_SeqBAIJ(Mat A,MatOption op,PetscBool flg)
1231 {
1232   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1233   PetscErrorCode ierr;
1234 
1235   PetscFunctionBegin;
1236   switch (op) {
1237   case MAT_ROW_ORIENTED:
1238     a->roworiented = flg;
1239     break;
1240   case MAT_KEEP_NONZERO_PATTERN:
1241     a->keepnonzeropattern = flg;
1242     break;
1243   case MAT_NEW_NONZERO_LOCATIONS:
1244     a->nonew = (flg ? 0 : 1);
1245     break;
1246   case MAT_NEW_NONZERO_LOCATION_ERR:
1247     a->nonew = (flg ? -1 : 0);
1248     break;
1249   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1250     a->nonew = (flg ? -2 : 0);
1251     break;
1252   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1253     a->nounused = (flg ? -1 : 0);
1254     break;
1255   case MAT_FORCE_DIAGONAL_ENTRIES:
1256   case MAT_IGNORE_OFF_PROC_ENTRIES:
1257   case MAT_USE_HASH_TABLE:
1258   case MAT_SORTED_FULL:
1259     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1260     break;
1261   case MAT_SPD:
1262   case MAT_SYMMETRIC:
1263   case MAT_STRUCTURALLY_SYMMETRIC:
1264   case MAT_HERMITIAN:
1265   case MAT_SYMMETRY_ETERNAL:
1266   case MAT_SUBMAT_SINGLEIS:
1267   case MAT_STRUCTURE_ONLY:
1268     /* These options are handled directly by MatSetOption() */
1269     break;
1270   default:
1271     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 /* used for both SeqBAIJ and SeqSBAIJ matrices */
1277 PetscErrorCode MatGetRow_SeqBAIJ_private(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v,PetscInt *ai,PetscInt *aj,PetscScalar *aa)
1278 {
1279   PetscErrorCode ierr;
1280   PetscInt       itmp,i,j,k,M,bn,bp,*idx_i,bs,bs2;
1281   MatScalar      *aa_i;
1282   PetscScalar    *v_i;
1283 
1284   PetscFunctionBegin;
1285   bs  = A->rmap->bs;
1286   bs2 = bs*bs;
1287   if (row < 0 || row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range", row);
1288 
1289   bn  = row/bs;   /* Block number */
1290   bp  = row % bs; /* Block Position */
1291   M   = ai[bn+1] - ai[bn];
1292   *nz = bs*M;
1293 
1294   if (v) {
1295     *v = NULL;
1296     if (*nz) {
1297       ierr = PetscMalloc1(*nz,v);CHKERRQ(ierr);
1298       for (i=0; i<M; i++) { /* for each block in the block row */
1299         v_i  = *v + i*bs;
1300         aa_i = aa + bs2*(ai[bn] + i);
1301         for (j=bp,k=0; j<bs2; j+=bs,k++) v_i[k] = aa_i[j];
1302       }
1303     }
1304   }
1305 
1306   if (idx) {
1307     *idx = NULL;
1308     if (*nz) {
1309       ierr = PetscMalloc1(*nz,idx);CHKERRQ(ierr);
1310       for (i=0; i<M; i++) { /* for each block in the block row */
1311         idx_i = *idx + i*bs;
1312         itmp  = bs*aj[ai[bn] + i];
1313         for (j=0; j<bs; j++) idx_i[j] = itmp++;
1314       }
1315     }
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode MatGetRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1321 {
1322   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1323   PetscErrorCode ierr;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatGetRow_SeqBAIJ_private(A,row,nz,idx,v,a->i,a->j,a->a);CHKERRQ(ierr);
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 PetscErrorCode MatRestoreRow_SeqBAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1331 {
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   if (nz)  *nz = 0;
1336   if (idx) {ierr = PetscFree(*idx);CHKERRQ(ierr);}
1337   if (v)   {ierr = PetscFree(*v);CHKERRQ(ierr);}
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 PetscErrorCode MatTranspose_SeqBAIJ(Mat A,MatReuse reuse,Mat *B)
1342 {
1343   Mat_SeqBAIJ    *a=(Mat_SeqBAIJ*)A->data,*at;
1344   Mat            C;
1345   PetscErrorCode ierr;
1346   PetscInt       i,j,k,*aj=a->j,*ai=a->i,bs=A->rmap->bs,mbs=a->mbs,nbs=a->nbs,*atfill;
1347   PetscInt       bs2=a->bs2,*ati,*atj,anzj,kr;
1348   MatScalar      *ata,*aa=a->a;
1349 
1350   PetscFunctionBegin;
1351   ierr = PetscCalloc1(1+nbs,&atfill);CHKERRQ(ierr);
1352   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) {
1353     for (i=0; i<ai[mbs]; i++) atfill[aj[i]] += 1; /* count num of non-zeros in row aj[i] */
1354 
1355     ierr = MatCreate(PetscObjectComm((PetscObject)A),&C);CHKERRQ(ierr);
1356     ierr = MatSetSizes(C,A->cmap->n,A->rmap->N,A->cmap->n,A->rmap->N);CHKERRQ(ierr);
1357     ierr = MatSetType(C,((PetscObject)A)->type_name);CHKERRQ(ierr);
1358     ierr = MatSeqBAIJSetPreallocation(C,bs,0,atfill);CHKERRQ(ierr);
1359 
1360     at  = (Mat_SeqBAIJ*)C->data;
1361     ati = at->i;
1362     for (i=0; i<nbs; i++) at->ilen[i] = at->imax[i] = ati[i+1] - ati[i];
1363   } else {
1364     C = *B;
1365     at = (Mat_SeqBAIJ*)C->data;
1366     ati = at->i;
1367   }
1368 
1369   atj = at->j;
1370   ata = at->a;
1371 
1372   /* Copy ati into atfill so we have locations of the next free space in atj */
1373   ierr = PetscArraycpy(atfill,ati,nbs);CHKERRQ(ierr);
1374 
1375   /* Walk through A row-wise and mark nonzero entries of A^T. */
1376   for (i=0; i<mbs; i++) {
1377     anzj = ai[i+1] - ai[i];
1378     for (j=0; j<anzj; j++) {
1379       atj[atfill[*aj]] = i;
1380       for (kr=0; kr<bs; kr++) {
1381         for (k=0; k<bs; k++) {
1382           ata[bs2*atfill[*aj]+k*bs+kr] = *aa++;
1383         }
1384       }
1385       atfill[*aj++] += 1;
1386     }
1387   }
1388   ierr = MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1389   ierr = MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1390 
1391   /* Clean up temporary space and complete requests. */
1392   ierr = PetscFree(atfill);CHKERRQ(ierr);
1393 
1394   if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) {
1395     ierr = MatSetBlockSizes(C,PetscAbs(A->cmap->bs),PetscAbs(A->rmap->bs));CHKERRQ(ierr);
1396     *B = C;
1397   } else {
1398     ierr = MatHeaderMerge(A,&C);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 PetscErrorCode MatIsTranspose_SeqBAIJ(Mat A,Mat B,PetscReal tol,PetscBool  *f)
1404 {
1405   PetscErrorCode ierr;
1406   Mat            Btrans;
1407 
1408   PetscFunctionBegin;
1409   *f   = PETSC_FALSE;
1410   ierr = MatTranspose_SeqBAIJ(A,MAT_INITIAL_MATRIX,&Btrans);CHKERRQ(ierr);
1411   ierr = MatEqual_SeqBAIJ(B,Btrans,f);CHKERRQ(ierr);
1412   ierr = MatDestroy(&Btrans);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
1417 PetscErrorCode MatView_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
1418 {
1419   Mat_SeqBAIJ    *A = (Mat_SeqBAIJ*)mat->data;
1420   PetscInt       header[4],M,N,m,bs,nz,cnt,i,j,k,l;
1421   PetscInt       *rowlens,*colidxs;
1422   PetscScalar    *matvals;
1423   PetscErrorCode ierr;
1424 
1425   PetscFunctionBegin;
1426   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
1427 
1428   M  = mat->rmap->N;
1429   N  = mat->cmap->N;
1430   m  = mat->rmap->n;
1431   bs = mat->rmap->bs;
1432   nz = bs*bs*A->nz;
1433 
1434   /* write matrix header */
1435   header[0] = MAT_FILE_CLASSID;
1436   header[1] = M; header[2] = N; header[3] = nz;
1437   ierr = PetscViewerBinaryWrite(viewer,header,4,PETSC_INT);CHKERRQ(ierr);
1438 
1439   /* store row lengths */
1440   ierr = PetscMalloc1(m,&rowlens);CHKERRQ(ierr);
1441   for (cnt=0, i=0; i<A->mbs; i++)
1442     for (j=0; j<bs; j++)
1443       rowlens[cnt++] = bs*(A->i[i+1] - A->i[i]);
1444   ierr = PetscViewerBinaryWrite(viewer,rowlens,m,PETSC_INT);CHKERRQ(ierr);
1445   ierr = PetscFree(rowlens);CHKERRQ(ierr);
1446 
1447   /* store column indices  */
1448   ierr = PetscMalloc1(nz,&colidxs);CHKERRQ(ierr);
1449   for (cnt=0, i=0; i<A->mbs; i++)
1450     for (k=0; k<bs; k++)
1451       for (j=A->i[i]; j<A->i[i+1]; j++)
1452         for (l=0; l<bs; l++)
1453           colidxs[cnt++] = bs*A->j[j] + l;
1454   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1455   ierr = PetscViewerBinaryWrite(viewer,colidxs,nz,PETSC_INT);CHKERRQ(ierr);
1456   ierr = PetscFree(colidxs);CHKERRQ(ierr);
1457 
1458   /* store nonzero values */
1459   ierr = PetscMalloc1(nz,&matvals);CHKERRQ(ierr);
1460   for (cnt=0, i=0; i<A->mbs; i++)
1461     for (k=0; k<bs; k++)
1462       for (j=A->i[i]; j<A->i[i+1]; j++)
1463         for (l=0; l<bs; l++)
1464           matvals[cnt++] = A->a[bs*(bs*j + l) + k];
1465   if (cnt != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_LIB,"Internal PETSc error: cnt = %D nz = %D",cnt,nz);
1466   ierr = PetscViewerBinaryWrite(viewer,matvals,nz,PETSC_SCALAR);CHKERRQ(ierr);
1467   ierr = PetscFree(matvals);CHKERRQ(ierr);
1468 
1469   /* write block size option to the viewer's .info file */
1470   ierr = MatView_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 static PetscErrorCode MatView_SeqBAIJ_ASCII_structonly(Mat A,PetscViewer viewer)
1475 {
1476   PetscErrorCode ierr;
1477   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
1478   PetscInt       i,bs = A->rmap->bs,k;
1479 
1480   PetscFunctionBegin;
1481   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1482   for (i=0; i<a->mbs; i++) {
1483     ierr = PetscViewerASCIIPrintf(viewer,"row %D-%D:",i*bs,i*bs+bs-1);CHKERRQ(ierr);
1484     for (k=a->i[i]; k<a->i[i+1]; k++) {
1485       ierr = PetscViewerASCIIPrintf(viewer," (%D-%D) ",bs*a->j[k],bs*a->j[k]+bs-1);CHKERRQ(ierr);
1486     }
1487     ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1488   }
1489   ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1490   PetscFunctionReturn(0);
1491 }
1492 
1493 static PetscErrorCode MatView_SeqBAIJ_ASCII(Mat A,PetscViewer viewer)
1494 {
1495   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1496   PetscErrorCode    ierr;
1497   PetscInt          i,j,bs = A->rmap->bs,k,l,bs2=a->bs2;
1498   PetscViewerFormat format;
1499 
1500   PetscFunctionBegin;
1501   if (A->structure_only) {
1502     ierr = MatView_SeqBAIJ_ASCII_structonly(A,viewer);CHKERRQ(ierr);
1503     PetscFunctionReturn(0);
1504   }
1505 
1506   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1507   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
1508     ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
1509   } else if (format == PETSC_VIEWER_ASCII_MATLAB) {
1510     const char *matname;
1511     Mat        aij;
1512     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&aij);CHKERRQ(ierr);
1513     ierr = PetscObjectGetName((PetscObject)A,&matname);CHKERRQ(ierr);
1514     ierr = PetscObjectSetName((PetscObject)aij,matname);CHKERRQ(ierr);
1515     ierr = MatView(aij,viewer);CHKERRQ(ierr);
1516     ierr = MatDestroy(&aij);CHKERRQ(ierr);
1517   } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
1518       PetscFunctionReturn(0);
1519   } else if (format == PETSC_VIEWER_ASCII_COMMON) {
1520     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1521     for (i=0; i<a->mbs; i++) {
1522       for (j=0; j<bs; j++) {
1523         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1524         for (k=a->i[i]; k<a->i[i+1]; k++) {
1525           for (l=0; l<bs; l++) {
1526 #if defined(PETSC_USE_COMPLEX)
1527             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1528               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %gi) ",bs*a->j[k]+l,
1529                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1530             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0 && PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1531               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %gi) ",bs*a->j[k]+l,
1532                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1533             } else if (PetscRealPart(a->a[bs2*k + l*bs + j]) != 0.0) {
1534               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1535             }
1536 #else
1537             if (a->a[bs2*k + l*bs + j] != 0.0) {
1538               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1539             }
1540 #endif
1541           }
1542         }
1543         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1544       }
1545     }
1546     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1547   } else {
1548     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);CHKERRQ(ierr);
1549     for (i=0; i<a->mbs; i++) {
1550       for (j=0; j<bs; j++) {
1551         ierr = PetscViewerASCIIPrintf(viewer,"row %D:",i*bs+j);CHKERRQ(ierr);
1552         for (k=a->i[i]; k<a->i[i+1]; k++) {
1553           for (l=0; l<bs; l++) {
1554 #if defined(PETSC_USE_COMPLEX)
1555             if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) > 0.0) {
1556               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g + %g i) ",bs*a->j[k]+l,
1557                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1558             } else if (PetscImaginaryPart(a->a[bs2*k + l*bs + j]) < 0.0) {
1559               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g - %g i) ",bs*a->j[k]+l,
1560                                             (double)PetscRealPart(a->a[bs2*k + l*bs + j]),-(double)PetscImaginaryPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1561             } else {
1562               ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)PetscRealPart(a->a[bs2*k + l*bs + j]));CHKERRQ(ierr);
1563             }
1564 #else
1565             ierr = PetscViewerASCIIPrintf(viewer," (%D, %g) ",bs*a->j[k]+l,(double)a->a[bs2*k + l*bs + j]);CHKERRQ(ierr);
1566 #endif
1567           }
1568         }
1569         ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
1570       }
1571     }
1572     ierr = PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);CHKERRQ(ierr);
1573   }
1574   ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
1575   PetscFunctionReturn(0);
1576 }
1577 
1578 #include <petscdraw.h>
1579 static PetscErrorCode MatView_SeqBAIJ_Draw_Zoom(PetscDraw draw,void *Aa)
1580 {
1581   Mat               A = (Mat) Aa;
1582   Mat_SeqBAIJ       *a=(Mat_SeqBAIJ*)A->data;
1583   PetscErrorCode    ierr;
1584   PetscInt          row,i,j,k,l,mbs=a->mbs,color,bs=A->rmap->bs,bs2=a->bs2;
1585   PetscReal         xl,yl,xr,yr,x_l,x_r,y_l,y_r;
1586   MatScalar         *aa;
1587   PetscViewer       viewer;
1588   PetscViewerFormat format;
1589 
1590   PetscFunctionBegin;
1591   ierr = PetscObjectQuery((PetscObject)A,"Zoomviewer",(PetscObject*)&viewer);CHKERRQ(ierr);
1592   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
1593   ierr = PetscDrawGetCoordinates(draw,&xl,&yl,&xr,&yr);CHKERRQ(ierr);
1594 
1595   /* loop over matrix elements drawing boxes */
1596 
1597   if (format != PETSC_VIEWER_DRAW_CONTOUR) {
1598     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1599     /* Blue for negative, Cyan for zero and  Red for positive */
1600     color = PETSC_DRAW_BLUE;
1601     for (i=0,row=0; i<mbs; i++,row+=bs) {
1602       for (j=a->i[i]; j<a->i[i+1]; j++) {
1603         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1604         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1605         aa  = a->a + j*bs2;
1606         for (k=0; k<bs; k++) {
1607           for (l=0; l<bs; l++) {
1608             if (PetscRealPart(*aa++) >=  0.) continue;
1609             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1610           }
1611         }
1612       }
1613     }
1614     color = PETSC_DRAW_CYAN;
1615     for (i=0,row=0; i<mbs; i++,row+=bs) {
1616       for (j=a->i[i]; j<a->i[i+1]; j++) {
1617         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1618         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1619         aa  = a->a + j*bs2;
1620         for (k=0; k<bs; k++) {
1621           for (l=0; l<bs; l++) {
1622             if (PetscRealPart(*aa++) != 0.) continue;
1623             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1624           }
1625         }
1626       }
1627     }
1628     color = PETSC_DRAW_RED;
1629     for (i=0,row=0; i<mbs; i++,row+=bs) {
1630       for (j=a->i[i]; j<a->i[i+1]; j++) {
1631         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1632         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1633         aa  = a->a + j*bs2;
1634         for (k=0; k<bs; k++) {
1635           for (l=0; l<bs; l++) {
1636             if (PetscRealPart(*aa++) <= 0.) continue;
1637             ierr = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1638           }
1639         }
1640       }
1641     }
1642     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1643   } else {
1644     /* use contour shading to indicate magnitude of values */
1645     /* first determine max of all nonzero values */
1646     PetscReal minv = 0.0, maxv = 0.0;
1647     PetscDraw popup;
1648 
1649     for (i=0; i<a->nz*a->bs2; i++) {
1650       if (PetscAbsScalar(a->a[i]) > maxv) maxv = PetscAbsScalar(a->a[i]);
1651     }
1652     if (minv >= maxv) maxv = minv + PETSC_SMALL;
1653     ierr = PetscDrawGetPopup(draw,&popup);CHKERRQ(ierr);
1654     ierr = PetscDrawScalePopup(popup,0.0,maxv);CHKERRQ(ierr);
1655 
1656     ierr = PetscDrawCollectiveBegin(draw);CHKERRQ(ierr);
1657     for (i=0,row=0; i<mbs; i++,row+=bs) {
1658       for (j=a->i[i]; j<a->i[i+1]; j++) {
1659         y_l = A->rmap->N - row - 1.0; y_r = y_l + 1.0;
1660         x_l = a->j[j]*bs; x_r = x_l + 1.0;
1661         aa  = a->a + j*bs2;
1662         for (k=0; k<bs; k++) {
1663           for (l=0; l<bs; l++) {
1664             MatScalar v = *aa++;
1665             color = PetscDrawRealToColor(PetscAbsScalar(v),minv,maxv);
1666             ierr  = PetscDrawRectangle(draw,x_l+k,y_l-l,x_r+k,y_r-l,color,color,color,color);CHKERRQ(ierr);
1667           }
1668         }
1669       }
1670     }
1671     ierr = PetscDrawCollectiveEnd(draw);CHKERRQ(ierr);
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 
1676 static PetscErrorCode MatView_SeqBAIJ_Draw(Mat A,PetscViewer viewer)
1677 {
1678   PetscErrorCode ierr;
1679   PetscReal      xl,yl,xr,yr,w,h;
1680   PetscDraw      draw;
1681   PetscBool      isnull;
1682 
1683   PetscFunctionBegin;
1684   ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
1685   ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr);
1686   if (isnull) PetscFunctionReturn(0);
1687 
1688   xr   = A->cmap->n; yr = A->rmap->N; h = yr/10.0; w = xr/10.0;
1689   xr  += w;          yr += h;        xl = -w;     yl = -h;
1690   ierr = PetscDrawSetCoordinates(draw,xl,yl,xr,yr);CHKERRQ(ierr);
1691   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",(PetscObject)viewer);CHKERRQ(ierr);
1692   ierr = PetscDrawZoom(draw,MatView_SeqBAIJ_Draw_Zoom,A);CHKERRQ(ierr);
1693   ierr = PetscObjectCompose((PetscObject)A,"Zoomviewer",NULL);CHKERRQ(ierr);
1694   ierr = PetscDrawSave(draw);CHKERRQ(ierr);
1695   PetscFunctionReturn(0);
1696 }
1697 
1698 PetscErrorCode MatView_SeqBAIJ(Mat A,PetscViewer viewer)
1699 {
1700   PetscErrorCode ierr;
1701   PetscBool      iascii,isbinary,isdraw;
1702 
1703   PetscFunctionBegin;
1704   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
1705   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
1706   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
1707   if (iascii) {
1708     ierr = MatView_SeqBAIJ_ASCII(A,viewer);CHKERRQ(ierr);
1709   } else if (isbinary) {
1710     ierr = MatView_SeqBAIJ_Binary(A,viewer);CHKERRQ(ierr);
1711   } else if (isdraw) {
1712     ierr = MatView_SeqBAIJ_Draw(A,viewer);CHKERRQ(ierr);
1713   } else {
1714     Mat B;
1715     ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
1716     ierr = MatView(B,viewer);CHKERRQ(ierr);
1717     ierr = MatDestroy(&B);CHKERRQ(ierr);
1718   }
1719   PetscFunctionReturn(0);
1720 }
1721 
1722 PetscErrorCode MatGetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],PetscScalar v[])
1723 {
1724   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
1725   PetscInt    *rp,k,low,high,t,row,nrow,i,col,l,*aj = a->j;
1726   PetscInt    *ai = a->i,*ailen = a->ilen;
1727   PetscInt    brow,bcol,ridx,cidx,bs=A->rmap->bs,bs2=a->bs2;
1728   MatScalar   *ap,*aa = a->a;
1729 
1730   PetscFunctionBegin;
1731   for (k=0; k<m; k++) { /* loop over rows */
1732     row = im[k]; brow = row/bs;
1733     if (row < 0) {v += n; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row"); */
1734     if (row >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D too large", row);
1735     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow];
1736     nrow = ailen[brow];
1737     for (l=0; l<n; l++) { /* loop over columns */
1738       if (in[l] < 0) {v++; continue;} /* SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column"); */
1739       if (in[l] >= A->cmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column %D too large", in[l]);
1740       col  = in[l];
1741       bcol = col/bs;
1742       cidx = col%bs;
1743       ridx = row%bs;
1744       high = nrow;
1745       low  = 0; /* assume unsorted */
1746       while (high-low > 5) {
1747         t = (low+high)/2;
1748         if (rp[t] > bcol) high = t;
1749         else             low  = t;
1750       }
1751       for (i=low; i<high; i++) {
1752         if (rp[i] > bcol) break;
1753         if (rp[i] == bcol) {
1754           *v++ = ap[bs2*i+bs*cidx+ridx];
1755           goto finished;
1756         }
1757       }
1758       *v++ = 0.0;
1759 finished:;
1760     }
1761   }
1762   PetscFunctionReturn(0);
1763 }
1764 
1765 PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
1766 {
1767   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ*)A->data;
1768   PetscInt          *rp,k,low,high,t,ii,jj,row,nrow,i,col,l,rmax,N,lastcol = -1;
1769   PetscInt          *imax=a->imax,*ai=a->i,*ailen=a->ilen;
1770   PetscErrorCode    ierr;
1771   PetscInt          *aj        =a->j,nonew=a->nonew,bs2=a->bs2,bs=A->rmap->bs,stepval;
1772   PetscBool         roworiented=a->roworiented;
1773   const PetscScalar *value     = v;
1774   MatScalar         *ap=NULL,*aa = a->a,*bap;
1775 
1776   PetscFunctionBegin;
1777   if (roworiented) {
1778     stepval = (n-1)*bs;
1779   } else {
1780     stepval = (m-1)*bs;
1781   }
1782   for (k=0; k<m; k++) { /* loop over added rows */
1783     row = im[k];
1784     if (row < 0) continue;
1785     if (PetscUnlikelyDebug(row >= a->mbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block row index too large %D max %D",row,a->mbs-1);
1786     rp   = aj + ai[row];
1787     if (!A->structure_only) ap = aa + bs2*ai[row];
1788     rmax = imax[row];
1789     nrow = ailen[row];
1790     low  = 0;
1791     high = nrow;
1792     for (l=0; l<n; l++) { /* loop over added columns */
1793       if (in[l] < 0) continue;
1794       if (PetscUnlikelyDebug(in[l] >= a->nbs)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Block column index too large %D max %D",in[l],a->nbs-1);
1795       col = in[l];
1796       if (!A->structure_only) {
1797         if (roworiented) {
1798           value = v + (k*(stepval+bs) + l)*bs;
1799         } else {
1800           value = v + (l*(stepval+bs) + k)*bs;
1801         }
1802       }
1803       if (col <= lastcol) low = 0;
1804       else high = nrow;
1805       lastcol = col;
1806       while (high-low > 7) {
1807         t = (low+high)/2;
1808         if (rp[t] > col) high = t;
1809         else             low  = t;
1810       }
1811       for (i=low; i<high; i++) {
1812         if (rp[i] > col) break;
1813         if (rp[i] == col) {
1814           if (A->structure_only) goto noinsert2;
1815           bap = ap +  bs2*i;
1816           if (roworiented) {
1817             if (is == ADD_VALUES) {
1818               for (ii=0; ii<bs; ii++,value+=stepval) {
1819                 for (jj=ii; jj<bs2; jj+=bs) {
1820                   bap[jj] += *value++;
1821                 }
1822               }
1823             } else {
1824               for (ii=0; ii<bs; ii++,value+=stepval) {
1825                 for (jj=ii; jj<bs2; jj+=bs) {
1826                   bap[jj] = *value++;
1827                 }
1828               }
1829             }
1830           } else {
1831             if (is == ADD_VALUES) {
1832               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1833                 for (jj=0; jj<bs; jj++) {
1834                   bap[jj] += value[jj];
1835                 }
1836                 bap += bs;
1837               }
1838             } else {
1839               for (ii=0; ii<bs; ii++,value+=bs+stepval) {
1840                 for (jj=0; jj<bs; jj++) {
1841                   bap[jj]  = value[jj];
1842                 }
1843                 bap += bs;
1844               }
1845             }
1846           }
1847           goto noinsert2;
1848         }
1849       }
1850       if (nonew == 1) goto noinsert2;
1851       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new blocked index new nonzero block (%D, %D) in the matrix", row, col);
1852       if (A->structure_only) {
1853         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,row,col,rmax,ai,aj,rp,imax,nonew,MatScalar);
1854       } else {
1855         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,row,col,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
1856       }
1857       N = nrow++ - 1; high++;
1858       /* shift up all the later entries in this row */
1859       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
1860       rp[i] = col;
1861       if (!A->structure_only) {
1862         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
1863         bap   = ap +  bs2*i;
1864         if (roworiented) {
1865           for (ii=0; ii<bs; ii++,value+=stepval) {
1866             for (jj=ii; jj<bs2; jj+=bs) {
1867               bap[jj] = *value++;
1868             }
1869           }
1870         } else {
1871           for (ii=0; ii<bs; ii++,value+=stepval) {
1872             for (jj=0; jj<bs; jj++) {
1873               *bap++ = *value++;
1874             }
1875           }
1876         }
1877       }
1878 noinsert2:;
1879       low = i;
1880     }
1881     ailen[row] = nrow;
1882   }
1883   PetscFunctionReturn(0);
1884 }
1885 
1886 PetscErrorCode MatAssemblyEnd_SeqBAIJ(Mat A,MatAssemblyType mode)
1887 {
1888   Mat_SeqBAIJ    *a     = (Mat_SeqBAIJ*)A->data;
1889   PetscInt       fshift = 0,i,*ai = a->i,*aj = a->j,*imax = a->imax;
1890   PetscInt       m      = A->rmap->N,*ip,N,*ailen = a->ilen;
1891   PetscErrorCode ierr;
1892   PetscInt       mbs  = a->mbs,bs2 = a->bs2,rmax = 0;
1893   MatScalar      *aa  = a->a,*ap;
1894   PetscReal      ratio=0.6;
1895 
1896   PetscFunctionBegin;
1897   if (mode == MAT_FLUSH_ASSEMBLY) PetscFunctionReturn(0);
1898 
1899   if (m) rmax = ailen[0];
1900   for (i=1; i<mbs; i++) {
1901     /* move each row back by the amount of empty slots (fshift) before it*/
1902     fshift += imax[i-1] - ailen[i-1];
1903     rmax    = PetscMax(rmax,ailen[i]);
1904     if (fshift) {
1905       ip = aj + ai[i];
1906       ap = aa + bs2*ai[i];
1907       N  = ailen[i];
1908       ierr = PetscArraymove(ip-fshift,ip,N);CHKERRQ(ierr);
1909       if (!A->structure_only) {
1910         ierr = PetscArraymove(ap-bs2*fshift,ap,bs2*N);CHKERRQ(ierr);
1911       }
1912     }
1913     ai[i] = ai[i-1] + ailen[i-1];
1914   }
1915   if (mbs) {
1916     fshift += imax[mbs-1] - ailen[mbs-1];
1917     ai[mbs] = ai[mbs-1] + ailen[mbs-1];
1918   }
1919 
1920   /* reset ilen and imax for each row */
1921   a->nonzerorowcnt = 0;
1922   if (A->structure_only) {
1923     ierr = PetscFree2(a->imax,a->ilen);CHKERRQ(ierr);
1924   } else { /* !A->structure_only */
1925     for (i=0; i<mbs; i++) {
1926       ailen[i] = imax[i] = ai[i+1] - ai[i];
1927       a->nonzerorowcnt += ((ai[i+1] - ai[i]) > 0);
1928     }
1929   }
1930   a->nz = ai[mbs];
1931 
1932   /* diagonals may have moved, so kill the diagonal pointers */
1933   a->idiagvalid = PETSC_FALSE;
1934   if (fshift && a->diag) {
1935     ierr    = PetscFree(a->diag);CHKERRQ(ierr);
1936     ierr    = PetscLogObjectMemory((PetscObject)A,-(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
1937     a->diag = NULL;
1938   }
1939   if (fshift && a->nounused == -1) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB, "Unused space detected in matrix: %D X %D block size %D, %D unneeded", m, A->cmap->n, A->rmap->bs, fshift*bs2);
1940   ierr = PetscInfo5(A,"Matrix size: %D X %D, block size %D; storage space: %D unneeded, %D used\n",m,A->cmap->n,A->rmap->bs,fshift*bs2,a->nz*bs2);CHKERRQ(ierr);
1941   ierr = PetscInfo1(A,"Number of mallocs during MatSetValues is %D\n",a->reallocs);CHKERRQ(ierr);
1942   ierr = PetscInfo1(A,"Most nonzeros blocks in any row is %D\n",rmax);CHKERRQ(ierr);
1943 
1944   A->info.mallocs    += a->reallocs;
1945   a->reallocs         = 0;
1946   A->info.nz_unneeded = (PetscReal)fshift*bs2;
1947   a->rmax             = rmax;
1948 
1949   if (!A->structure_only) {
1950     ierr = MatCheckCompressedRow(A,a->nonzerorowcnt,&a->compressedrow,a->i,mbs,ratio);CHKERRQ(ierr);
1951   }
1952   PetscFunctionReturn(0);
1953 }
1954 
1955 /*
1956    This function returns an array of flags which indicate the locations of contiguous
1957    blocks that should be zeroed. for eg: if bs = 3  and is = [0,1,2,3,5,6,7,8,9]
1958    then the resulting sizes = [3,1,1,3,1] correspondig to sets [(0,1,2),(3),(5),(6,7,8),(9)]
1959    Assume: sizes should be long enough to hold all the values.
1960 */
1961 static PetscErrorCode MatZeroRows_SeqBAIJ_Check_Blocks(PetscInt idx[],PetscInt n,PetscInt bs,PetscInt sizes[], PetscInt *bs_max)
1962 {
1963   PetscInt  i,j,k,row;
1964   PetscBool flg;
1965 
1966   PetscFunctionBegin;
1967   for (i=0,j=0; i<n; j++) {
1968     row = idx[i];
1969     if (row%bs!=0) { /* Not the begining of a block */
1970       sizes[j] = 1;
1971       i++;
1972     } else if (i+bs > n) { /* complete block doesn't exist (at idx end) */
1973       sizes[j] = 1;         /* Also makes sure atleast 'bs' values exist for next else */
1974       i++;
1975     } else { /* Begining of the block, so check if the complete block exists */
1976       flg = PETSC_TRUE;
1977       for (k=1; k<bs; k++) {
1978         if (row+k != idx[i+k]) { /* break in the block */
1979           flg = PETSC_FALSE;
1980           break;
1981         }
1982       }
1983       if (flg) { /* No break in the bs */
1984         sizes[j] = bs;
1985         i       += bs;
1986       } else {
1987         sizes[j] = 1;
1988         i++;
1989       }
1990     }
1991   }
1992   *bs_max = j;
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 PetscErrorCode MatZeroRows_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
1997 {
1998   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
1999   PetscErrorCode    ierr;
2000   PetscInt          i,j,k,count,*rows;
2001   PetscInt          bs=A->rmap->bs,bs2=baij->bs2,*sizes,row,bs_max;
2002   PetscScalar       zero = 0.0;
2003   MatScalar         *aa;
2004   const PetscScalar *xx;
2005   PetscScalar       *bb;
2006 
2007   PetscFunctionBegin;
2008   /* fix right hand side if needed */
2009   if (x && b) {
2010     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2011     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2012     for (i=0; i<is_n; i++) {
2013       bb[is_idx[i]] = diag*xx[is_idx[i]];
2014     }
2015     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2016     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2017   }
2018 
2019   /* Make a copy of the IS and  sort it */
2020   /* allocate memory for rows,sizes */
2021   ierr = PetscMalloc2(is_n,&rows,2*is_n,&sizes);CHKERRQ(ierr);
2022 
2023   /* copy IS values to rows, and sort them */
2024   for (i=0; i<is_n; i++) rows[i] = is_idx[i];
2025   ierr = PetscSortInt(is_n,rows);CHKERRQ(ierr);
2026 
2027   if (baij->keepnonzeropattern) {
2028     for (i=0; i<is_n; i++) sizes[i] = 1;
2029     bs_max          = is_n;
2030   } else {
2031     ierr = MatZeroRows_SeqBAIJ_Check_Blocks(rows,is_n,bs,sizes,&bs_max);CHKERRQ(ierr);
2032     A->nonzerostate++;
2033   }
2034 
2035   for (i=0,j=0; i<bs_max; j+=sizes[i],i++) {
2036     row = rows[j];
2037     if (row < 0 || row > A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",row);
2038     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2039     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2040     if (sizes[i] == bs && !baij->keepnonzeropattern) {
2041       if (diag != (PetscScalar)0.0) {
2042         if (baij->ilen[row/bs] > 0) {
2043           baij->ilen[row/bs]       = 1;
2044           baij->j[baij->i[row/bs]] = row/bs;
2045 
2046           ierr = PetscArrayzero(aa,count*bs);CHKERRQ(ierr);
2047         }
2048         /* Now insert all the diagonal values for this bs */
2049         for (k=0; k<bs; k++) {
2050           ierr = (*A->ops->setvalues)(A,1,rows+j+k,1,rows+j+k,&diag,INSERT_VALUES);CHKERRQ(ierr);
2051         }
2052       } else { /* (diag == 0.0) */
2053         baij->ilen[row/bs] = 0;
2054       } /* end (diag == 0.0) */
2055     } else { /* (sizes[i] != bs) */
2056       if (PetscUnlikelyDebug(sizes[i] != 1)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Internal Error. Value should be 1");
2057       for (k=0; k<count; k++) {
2058         aa[0] =  zero;
2059         aa   += bs;
2060       }
2061       if (diag != (PetscScalar)0.0) {
2062         ierr = (*A->ops->setvalues)(A,1,rows+j,1,rows+j,&diag,INSERT_VALUES);CHKERRQ(ierr);
2063       }
2064     }
2065   }
2066 
2067   ierr = PetscFree2(rows,sizes);CHKERRQ(ierr);
2068   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 PetscErrorCode MatZeroRowsColumns_SeqBAIJ(Mat A,PetscInt is_n,const PetscInt is_idx[],PetscScalar diag,Vec x, Vec b)
2073 {
2074   Mat_SeqBAIJ       *baij=(Mat_SeqBAIJ*)A->data;
2075   PetscErrorCode    ierr;
2076   PetscInt          i,j,k,count;
2077   PetscInt          bs   =A->rmap->bs,bs2=baij->bs2,row,col;
2078   PetscScalar       zero = 0.0;
2079   MatScalar         *aa;
2080   const PetscScalar *xx;
2081   PetscScalar       *bb;
2082   PetscBool         *zeroed,vecs = PETSC_FALSE;
2083 
2084   PetscFunctionBegin;
2085   /* fix right hand side if needed */
2086   if (x && b) {
2087     ierr = VecGetArrayRead(x,&xx);CHKERRQ(ierr);
2088     ierr = VecGetArray(b,&bb);CHKERRQ(ierr);
2089     vecs = PETSC_TRUE;
2090   }
2091 
2092   /* zero the columns */
2093   ierr = PetscCalloc1(A->rmap->n,&zeroed);CHKERRQ(ierr);
2094   for (i=0; i<is_n; i++) {
2095     if (is_idx[i] < 0 || is_idx[i] >= A->rmap->N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"row %D out of range",is_idx[i]);
2096     zeroed[is_idx[i]] = PETSC_TRUE;
2097   }
2098   for (i=0; i<A->rmap->N; i++) {
2099     if (!zeroed[i]) {
2100       row = i/bs;
2101       for (j=baij->i[row]; j<baij->i[row+1]; j++) {
2102         for (k=0; k<bs; k++) {
2103           col = bs*baij->j[j] + k;
2104           if (zeroed[col]) {
2105             aa = ((MatScalar*)(baij->a)) + j*bs2 + (i%bs) + bs*k;
2106             if (vecs) bb[i] -= aa[0]*xx[col];
2107             aa[0] = 0.0;
2108           }
2109         }
2110       }
2111     } else if (vecs) bb[i] = diag*xx[i];
2112   }
2113   ierr = PetscFree(zeroed);CHKERRQ(ierr);
2114   if (vecs) {
2115     ierr = VecRestoreArrayRead(x,&xx);CHKERRQ(ierr);
2116     ierr = VecRestoreArray(b,&bb);CHKERRQ(ierr);
2117   }
2118 
2119   /* zero the rows */
2120   for (i=0; i<is_n; i++) {
2121     row   = is_idx[i];
2122     count = (baij->i[row/bs +1] - baij->i[row/bs])*bs;
2123     aa    = ((MatScalar*)(baij->a)) + baij->i[row/bs]*bs2 + (row%bs);
2124     for (k=0; k<count; k++) {
2125       aa[0] =  zero;
2126       aa   += bs;
2127     }
2128     if (diag != (PetscScalar)0.0) {
2129       ierr = (*A->ops->setvalues)(A,1,&row,1,&row,&diag,INSERT_VALUES);CHKERRQ(ierr);
2130     }
2131   }
2132   ierr = MatAssemblyEnd_SeqBAIJ(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2133   PetscFunctionReturn(0);
2134 }
2135 
2136 PetscErrorCode MatSetValues_SeqBAIJ(Mat A,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode is)
2137 {
2138   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2139   PetscInt       *rp,k,low,high,t,ii,row,nrow,i,col,l,rmax,N,lastcol = -1;
2140   PetscInt       *imax=a->imax,*ai=a->i,*ailen=a->ilen;
2141   PetscInt       *aj  =a->j,nonew=a->nonew,bs=A->rmap->bs,brow,bcol;
2142   PetscErrorCode ierr;
2143   PetscInt       ridx,cidx,bs2=a->bs2;
2144   PetscBool      roworiented=a->roworiented;
2145   MatScalar      *ap=NULL,value=0.0,*aa=a->a,*bap;
2146 
2147   PetscFunctionBegin;
2148   for (k=0; k<m; k++) { /* loop over added rows */
2149     row  = im[k];
2150     brow = row/bs;
2151     if (row < 0) continue;
2152     if (PetscUnlikelyDebug(row >= A->rmap->N)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",row,A->rmap->N-1);
2153     rp   = aj + ai[brow];
2154     if (!A->structure_only) ap = aa + bs2*ai[brow];
2155     rmax = imax[brow];
2156     nrow = ailen[brow];
2157     low  = 0;
2158     high = nrow;
2159     for (l=0; l<n; l++) { /* loop over added columns */
2160       if (in[l] < 0) continue;
2161       if (PetscUnlikelyDebug(in[l] >= A->cmap->n)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[l],A->cmap->n-1);
2162       col  = in[l]; bcol = col/bs;
2163       ridx = row % bs; cidx = col % bs;
2164       if (!A->structure_only) {
2165         if (roworiented) {
2166           value = v[l + k*n];
2167         } else {
2168           value = v[k + l*m];
2169         }
2170       }
2171       if (col <= lastcol) low = 0; else high = nrow;
2172       lastcol = col;
2173       while (high-low > 7) {
2174         t = (low+high)/2;
2175         if (rp[t] > bcol) high = t;
2176         else              low  = t;
2177       }
2178       for (i=low; i<high; i++) {
2179         if (rp[i] > bcol) break;
2180         if (rp[i] == bcol) {
2181           bap = ap +  bs2*i + bs*cidx + ridx;
2182           if (!A->structure_only) {
2183             if (is == ADD_VALUES) *bap += value;
2184             else                  *bap  = value;
2185           }
2186           goto noinsert1;
2187         }
2188       }
2189       if (nonew == 1) goto noinsert1;
2190       if (nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col);
2191       if (A->structure_only) {
2192         MatSeqXAIJReallocateAIJ_structure_only(A,a->mbs,bs2,nrow,brow,bcol,rmax,ai,aj,rp,imax,nonew,MatScalar);
2193       } else {
2194         MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,imax,nonew,MatScalar);
2195       }
2196       N = nrow++ - 1; high++;
2197       /* shift up all the later entries in this row */
2198       ierr  = PetscArraymove(rp+i+1,rp+i,N-i+1);CHKERRQ(ierr);
2199       rp[i] = bcol;
2200       if (!A->structure_only) {
2201         ierr = PetscArraymove(ap+bs2*(i+1),ap+bs2*i,bs2*(N-i+1));CHKERRQ(ierr);
2202         ierr = PetscArrayzero(ap+bs2*i,bs2);CHKERRQ(ierr);
2203         ap[bs2*i + bs*cidx + ridx] = value;
2204       }
2205       a->nz++;
2206       A->nonzerostate++;
2207 noinsert1:;
2208       low = i;
2209     }
2210     ailen[brow] = nrow;
2211   }
2212   PetscFunctionReturn(0);
2213 }
2214 
2215 PetscErrorCode MatILUFactor_SeqBAIJ(Mat inA,IS row,IS col,const MatFactorInfo *info)
2216 {
2217   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)inA->data;
2218   Mat            outA;
2219   PetscErrorCode ierr;
2220   PetscBool      row_identity,col_identity;
2221 
2222   PetscFunctionBegin;
2223   if (info->levels != 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only levels = 0 supported for in-place ILU");
2224   ierr = ISIdentity(row,&row_identity);CHKERRQ(ierr);
2225   ierr = ISIdentity(col,&col_identity);CHKERRQ(ierr);
2226   if (!row_identity || !col_identity) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Row and column permutations must be identity for in-place ILU");
2227 
2228   outA            = inA;
2229   inA->factortype = MAT_FACTOR_LU;
2230   ierr = PetscFree(inA->solvertype);CHKERRQ(ierr);
2231   ierr = PetscStrallocpy(MATSOLVERPETSC,&inA->solvertype);CHKERRQ(ierr);
2232 
2233   ierr = MatMarkDiagonal_SeqBAIJ(inA);CHKERRQ(ierr);
2234 
2235   ierr   = PetscObjectReference((PetscObject)row);CHKERRQ(ierr);
2236   ierr   = ISDestroy(&a->row);CHKERRQ(ierr);
2237   a->row = row;
2238   ierr   = PetscObjectReference((PetscObject)col);CHKERRQ(ierr);
2239   ierr   = ISDestroy(&a->col);CHKERRQ(ierr);
2240   a->col = col;
2241 
2242   /* Create the invert permutation so that it can be used in MatLUFactorNumeric() */
2243   ierr = ISDestroy(&a->icol);CHKERRQ(ierr);
2244   ierr = ISInvertPermutation(col,PETSC_DECIDE,&a->icol);CHKERRQ(ierr);
2245   ierr = PetscLogObjectParent((PetscObject)inA,(PetscObject)a->icol);CHKERRQ(ierr);
2246 
2247   ierr = MatSeqBAIJSetNumericFactorization_inplace(inA,(PetscBool)(row_identity && col_identity));CHKERRQ(ierr);
2248   if (!a->solve_work) {
2249     ierr = PetscMalloc1(inA->rmap->N+inA->rmap->bs,&a->solve_work);CHKERRQ(ierr);
2250     ierr = PetscLogObjectMemory((PetscObject)inA,(inA->rmap->N+inA->rmap->bs)*sizeof(PetscScalar));CHKERRQ(ierr);
2251   }
2252   ierr = MatLUFactorNumeric(outA,inA,info);CHKERRQ(ierr);
2253   PetscFunctionReturn(0);
2254 }
2255 
2256 PetscErrorCode  MatSeqBAIJSetColumnIndices_SeqBAIJ(Mat mat,PetscInt *indices)
2257 {
2258   Mat_SeqBAIJ *baij = (Mat_SeqBAIJ*)mat->data;
2259   PetscInt    i,nz,mbs;
2260 
2261   PetscFunctionBegin;
2262   nz  = baij->maxnz;
2263   mbs = baij->mbs;
2264   for (i=0; i<nz; i++) {
2265     baij->j[i] = indices[i];
2266   }
2267   baij->nz = nz;
2268   for (i=0; i<mbs; i++) {
2269     baij->ilen[i] = baij->imax[i];
2270   }
2271   PetscFunctionReturn(0);
2272 }
2273 
2274 /*@
2275     MatSeqBAIJSetColumnIndices - Set the column indices for all the rows
2276        in the matrix.
2277 
2278   Input Parameters:
2279 +  mat - the SeqBAIJ matrix
2280 -  indices - the column indices
2281 
2282   Level: advanced
2283 
2284   Notes:
2285     This can be called if you have precomputed the nonzero structure of the
2286   matrix and want to provide it to the matrix object to improve the performance
2287   of the MatSetValues() operation.
2288 
2289     You MUST have set the correct numbers of nonzeros per row in the call to
2290   MatCreateSeqBAIJ(), and the columns indices MUST be sorted.
2291 
2292     MUST be called before any calls to MatSetValues();
2293 
2294 @*/
2295 PetscErrorCode  MatSeqBAIJSetColumnIndices(Mat mat,PetscInt *indices)
2296 {
2297   PetscErrorCode ierr;
2298 
2299   PetscFunctionBegin;
2300   PetscValidHeaderSpecific(mat,MAT_CLASSID,1);
2301   PetscValidPointer(indices,2);
2302   ierr = PetscUseMethod(mat,"MatSeqBAIJSetColumnIndices_C",(Mat,PetscInt*),(mat,indices));CHKERRQ(ierr);
2303   PetscFunctionReturn(0);
2304 }
2305 
2306 PetscErrorCode MatGetRowMaxAbs_SeqBAIJ(Mat A,Vec v,PetscInt idx[])
2307 {
2308   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2309   PetscErrorCode ierr;
2310   PetscInt       i,j,n,row,bs,*ai,*aj,mbs;
2311   PetscReal      atmp;
2312   PetscScalar    *x,zero = 0.0;
2313   MatScalar      *aa;
2314   PetscInt       ncols,brow,krow,kcol;
2315 
2316   PetscFunctionBegin;
2317   if (A->factortype) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Not for factored matrix");
2318   bs  = A->rmap->bs;
2319   aa  = a->a;
2320   ai  = a->i;
2321   aj  = a->j;
2322   mbs = a->mbs;
2323 
2324   ierr = VecSet(v,zero);CHKERRQ(ierr);
2325   ierr = VecGetArray(v,&x);CHKERRQ(ierr);
2326   ierr = VecGetLocalSize(v,&n);CHKERRQ(ierr);
2327   if (n != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Nonconforming matrix and vector");
2328   for (i=0; i<mbs; i++) {
2329     ncols = ai[1] - ai[0]; ai++;
2330     brow  = bs*i;
2331     for (j=0; j<ncols; j++) {
2332       for (kcol=0; kcol<bs; kcol++) {
2333         for (krow=0; krow<bs; krow++) {
2334           atmp = PetscAbsScalar(*aa);aa++;
2335           row  = brow + krow;   /* row index */
2336           if (PetscAbsScalar(x[row]) < atmp) {x[row] = atmp; if (idx) idx[row] = bs*(*aj) + kcol;}
2337         }
2338       }
2339       aj++;
2340     }
2341   }
2342   ierr = VecRestoreArray(v,&x);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 PetscErrorCode MatCopy_SeqBAIJ(Mat A,Mat B,MatStructure str)
2347 {
2348   PetscErrorCode ierr;
2349 
2350   PetscFunctionBegin;
2351   /* If the two matrices have the same copy implementation, use fast copy. */
2352   if (str == SAME_NONZERO_PATTERN && (A->ops->copy == B->ops->copy)) {
2353     Mat_SeqBAIJ *a  = (Mat_SeqBAIJ*)A->data;
2354     Mat_SeqBAIJ *b  = (Mat_SeqBAIJ*)B->data;
2355     PetscInt    ambs=a->mbs,bmbs=b->mbs,abs=A->rmap->bs,bbs=B->rmap->bs,bs2=abs*abs;
2356 
2357     if (a->i[ambs] != b->i[bmbs]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Number of nonzero blocks in matrices A %D and B %D are different",a->i[ambs],b->i[bmbs]);
2358     if (abs != bbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"Block size A %D and B %D are different",abs,bbs);
2359     ierr = PetscArraycpy(b->a,a->a,bs2*a->i[ambs]);CHKERRQ(ierr);
2360     ierr = PetscObjectStateIncrease((PetscObject)B);CHKERRQ(ierr);
2361   } else {
2362     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
2363   }
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 PetscErrorCode MatSetUp_SeqBAIJ(Mat A)
2368 {
2369   PetscErrorCode ierr;
2370 
2371   PetscFunctionBegin;
2372   ierr = MatSeqBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,NULL);CHKERRQ(ierr);
2373   PetscFunctionReturn(0);
2374 }
2375 
2376 static PetscErrorCode MatSeqBAIJGetArray_SeqBAIJ(Mat A,PetscScalar *array[])
2377 {
2378   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2379 
2380   PetscFunctionBegin;
2381   *array = a->a;
2382   PetscFunctionReturn(0);
2383 }
2384 
2385 static PetscErrorCode MatSeqBAIJRestoreArray_SeqBAIJ(Mat A,PetscScalar *array[])
2386 {
2387   PetscFunctionBegin;
2388   *array = NULL;
2389   PetscFunctionReturn(0);
2390 }
2391 
2392 PetscErrorCode MatAXPYGetPreallocation_SeqBAIJ(Mat Y,Mat X,PetscInt *nnz)
2393 {
2394   PetscInt       bs = Y->rmap->bs,mbs = Y->rmap->N/bs;
2395   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data;
2396   Mat_SeqBAIJ    *y = (Mat_SeqBAIJ*)Y->data;
2397   PetscErrorCode ierr;
2398 
2399   PetscFunctionBegin;
2400   /* Set the number of nonzeros in the new matrix */
2401   ierr = MatAXPYGetPreallocation_SeqX_private(mbs,x->i,x->j,y->i,y->j,nnz);CHKERRQ(ierr);
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 PetscErrorCode MatAXPY_SeqBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
2406 {
2407   Mat_SeqBAIJ    *x = (Mat_SeqBAIJ*)X->data,*y = (Mat_SeqBAIJ*)Y->data;
2408   PetscErrorCode ierr;
2409   PetscInt       bs=Y->rmap->bs,bs2=bs*bs;
2410   PetscBLASInt   one=1;
2411 
2412   PetscFunctionBegin;
2413   if (str == SAME_NONZERO_PATTERN) {
2414     PetscScalar  alpha = a;
2415     PetscBLASInt bnz;
2416     ierr = PetscBLASIntCast(x->nz*bs2,&bnz);CHKERRQ(ierr);
2417     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,x->a,&one,y->a,&one));
2418     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
2419   } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */
2420     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
2421   } else {
2422     Mat      B;
2423     PetscInt *nnz;
2424     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
2425     ierr = PetscMalloc1(Y->rmap->N,&nnz);CHKERRQ(ierr);
2426     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
2427     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
2428     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
2429     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
2430     ierr = MatSetType(B,(MatType) ((PetscObject)Y)->type_name);CHKERRQ(ierr);
2431     ierr = MatAXPYGetPreallocation_SeqBAIJ(Y,X,nnz);CHKERRQ(ierr);
2432     ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
2433     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
2434     ierr = MatHeaderReplace(Y,&B);CHKERRQ(ierr);
2435     ierr = PetscFree(nnz);CHKERRQ(ierr);
2436   }
2437   PetscFunctionReturn(0);
2438 }
2439 
2440 PETSC_INTERN PetscErrorCode MatConjugate_SeqBAIJ(Mat A)
2441 {
2442 #if defined(PETSC_USE_COMPLEX)
2443   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2444   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2445   MatScalar   *aa = a->a;
2446 
2447   PetscFunctionBegin;
2448   for (i=0; i<nz; i++) aa[i] = PetscConj(aa[i]);
2449 #else
2450   PetscFunctionBegin;
2451 #endif
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 PetscErrorCode MatRealPart_SeqBAIJ(Mat A)
2456 {
2457   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2458   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2459   MatScalar   *aa = a->a;
2460 
2461   PetscFunctionBegin;
2462   for (i=0; i<nz; i++) aa[i] = PetscRealPart(aa[i]);
2463   PetscFunctionReturn(0);
2464 }
2465 
2466 PetscErrorCode MatImaginaryPart_SeqBAIJ(Mat A)
2467 {
2468   Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
2469   PetscInt    i,nz = a->bs2*a->i[a->mbs];
2470   MatScalar   *aa = a->a;
2471 
2472   PetscFunctionBegin;
2473   for (i=0; i<nz; i++) aa[i] = PetscImaginaryPart(aa[i]);
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 /*
2478     Code almost identical to MatGetColumnIJ_SeqAIJ() should share common code
2479 */
2480 PetscErrorCode MatGetColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2481 {
2482   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2483   PetscErrorCode ierr;
2484   PetscInt       bs = A->rmap->bs,i,*collengths,*cia,*cja,n = A->cmap->n/bs,m = A->rmap->n/bs;
2485   PetscInt       nz = a->i[m],row,*jj,mr,col;
2486 
2487   PetscFunctionBegin;
2488   *nn = n;
2489   if (!ia) PetscFunctionReturn(0);
2490   if (symmetric) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not for BAIJ matrices");
2491   else {
2492     ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2493     ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2494     ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2495     jj   = a->j;
2496     for (i=0; i<nz; i++) {
2497       collengths[jj[i]]++;
2498     }
2499     cia[0] = oshift;
2500     for (i=0; i<n; i++) {
2501       cia[i+1] = cia[i] + collengths[i];
2502     }
2503     ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2504     jj   = a->j;
2505     for (row=0; row<m; row++) {
2506       mr = a->i[row+1] - a->i[row];
2507       for (i=0; i<mr; i++) {
2508         col = *jj++;
2509 
2510         cja[cia[col] + collengths[col]++ - oshift] = row + oshift;
2511       }
2512     }
2513     ierr = PetscFree(collengths);CHKERRQ(ierr);
2514     *ia  = cia; *ja = cja;
2515   }
2516   PetscFunctionReturn(0);
2517 }
2518 
2519 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscBool  *done)
2520 {
2521   PetscErrorCode ierr;
2522 
2523   PetscFunctionBegin;
2524   if (!ia) PetscFunctionReturn(0);
2525   ierr = PetscFree(*ia);CHKERRQ(ierr);
2526   ierr = PetscFree(*ja);CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 /*
2531  MatGetColumnIJ_SeqBAIJ_Color() and MatRestoreColumnIJ_SeqBAIJ_Color() are customized from
2532  MatGetColumnIJ_SeqBAIJ() and MatRestoreColumnIJ_SeqBAIJ() by adding an output
2533  spidx[], index of a->a, to be used in MatTransposeColoringCreate() and MatFDColoringCreate()
2534  */
2535 PetscErrorCode MatGetColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *nn,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2536 {
2537   Mat_SeqBAIJ    *a = (Mat_SeqBAIJ*)A->data;
2538   PetscErrorCode ierr;
2539   PetscInt       i,*collengths,*cia,*cja,n=a->nbs,m=a->mbs;
2540   PetscInt       nz = a->i[m],row,*jj,mr,col;
2541   PetscInt       *cspidx;
2542 
2543   PetscFunctionBegin;
2544   *nn = n;
2545   if (!ia) PetscFunctionReturn(0);
2546 
2547   ierr = PetscCalloc1(n,&collengths);CHKERRQ(ierr);
2548   ierr = PetscMalloc1(n+1,&cia);CHKERRQ(ierr);
2549   ierr = PetscMalloc1(nz,&cja);CHKERRQ(ierr);
2550   ierr = PetscMalloc1(nz,&cspidx);CHKERRQ(ierr);
2551   jj   = a->j;
2552   for (i=0; i<nz; i++) {
2553     collengths[jj[i]]++;
2554   }
2555   cia[0] = oshift;
2556   for (i=0; i<n; i++) {
2557     cia[i+1] = cia[i] + collengths[i];
2558   }
2559   ierr = PetscArrayzero(collengths,n);CHKERRQ(ierr);
2560   jj   = a->j;
2561   for (row=0; row<m; row++) {
2562     mr = a->i[row+1] - a->i[row];
2563     for (i=0; i<mr; i++) {
2564       col = *jj++;
2565       cspidx[cia[col] + collengths[col] - oshift] = a->i[row] + i; /* index of a->j */
2566       cja[cia[col] + collengths[col]++ - oshift]  = row + oshift;
2567     }
2568   }
2569   ierr   = PetscFree(collengths);CHKERRQ(ierr);
2570   *ia    = cia;
2571   *ja    = cja;
2572   *spidx = cspidx;
2573   PetscFunctionReturn(0);
2574 }
2575 
2576 PetscErrorCode MatRestoreColumnIJ_SeqBAIJ_Color(Mat A,PetscInt oshift,PetscBool symmetric,PetscBool inodecompressed,PetscInt *n,const PetscInt *ia[],const PetscInt *ja[],PetscInt *spidx[],PetscBool  *done)
2577 {
2578   PetscErrorCode ierr;
2579 
2580   PetscFunctionBegin;
2581   ierr = MatRestoreColumnIJ_SeqBAIJ(A,oshift,symmetric,inodecompressed,n,ia,ja,done);CHKERRQ(ierr);
2582   ierr = PetscFree(*spidx);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 PetscErrorCode MatShift_SeqBAIJ(Mat Y,PetscScalar a)
2587 {
2588   PetscErrorCode ierr;
2589   Mat_SeqBAIJ     *aij = (Mat_SeqBAIJ*)Y->data;
2590 
2591   PetscFunctionBegin;
2592   if (!Y->preallocated || !aij->nz) {
2593     ierr = MatSeqBAIJSetPreallocation(Y,Y->rmap->bs,1,NULL);CHKERRQ(ierr);
2594   }
2595   ierr = MatShift_Basic(Y,a);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 /* -------------------------------------------------------------------*/
2600 static struct _MatOps MatOps_Values = {MatSetValues_SeqBAIJ,
2601                                        MatGetRow_SeqBAIJ,
2602                                        MatRestoreRow_SeqBAIJ,
2603                                        MatMult_SeqBAIJ_N,
2604                                /* 4*/  MatMultAdd_SeqBAIJ_N,
2605                                        MatMultTranspose_SeqBAIJ,
2606                                        MatMultTransposeAdd_SeqBAIJ,
2607                                        NULL,
2608                                        NULL,
2609                                        NULL,
2610                                /* 10*/ NULL,
2611                                        MatLUFactor_SeqBAIJ,
2612                                        NULL,
2613                                        NULL,
2614                                        MatTranspose_SeqBAIJ,
2615                                /* 15*/ MatGetInfo_SeqBAIJ,
2616                                        MatEqual_SeqBAIJ,
2617                                        MatGetDiagonal_SeqBAIJ,
2618                                        MatDiagonalScale_SeqBAIJ,
2619                                        MatNorm_SeqBAIJ,
2620                                /* 20*/ NULL,
2621                                        MatAssemblyEnd_SeqBAIJ,
2622                                        MatSetOption_SeqBAIJ,
2623                                        MatZeroEntries_SeqBAIJ,
2624                                /* 24*/ MatZeroRows_SeqBAIJ,
2625                                        NULL,
2626                                        NULL,
2627                                        NULL,
2628                                        NULL,
2629                                /* 29*/ MatSetUp_SeqBAIJ,
2630                                        NULL,
2631                                        NULL,
2632                                        NULL,
2633                                        NULL,
2634                                /* 34*/ MatDuplicate_SeqBAIJ,
2635                                        NULL,
2636                                        NULL,
2637                                        MatILUFactor_SeqBAIJ,
2638                                        NULL,
2639                                /* 39*/ MatAXPY_SeqBAIJ,
2640                                        MatCreateSubMatrices_SeqBAIJ,
2641                                        MatIncreaseOverlap_SeqBAIJ,
2642                                        MatGetValues_SeqBAIJ,
2643                                        MatCopy_SeqBAIJ,
2644                                /* 44*/ NULL,
2645                                        MatScale_SeqBAIJ,
2646                                        MatShift_SeqBAIJ,
2647                                        NULL,
2648                                        MatZeroRowsColumns_SeqBAIJ,
2649                                /* 49*/ NULL,
2650                                        MatGetRowIJ_SeqBAIJ,
2651                                        MatRestoreRowIJ_SeqBAIJ,
2652                                        MatGetColumnIJ_SeqBAIJ,
2653                                        MatRestoreColumnIJ_SeqBAIJ,
2654                                /* 54*/ MatFDColoringCreate_SeqXAIJ,
2655                                        NULL,
2656                                        NULL,
2657                                        NULL,
2658                                        MatSetValuesBlocked_SeqBAIJ,
2659                                /* 59*/ MatCreateSubMatrix_SeqBAIJ,
2660                                        MatDestroy_SeqBAIJ,
2661                                        MatView_SeqBAIJ,
2662                                        NULL,
2663                                        NULL,
2664                                /* 64*/ NULL,
2665                                        NULL,
2666                                        NULL,
2667                                        NULL,
2668                                        NULL,
2669                                /* 69*/ MatGetRowMaxAbs_SeqBAIJ,
2670                                        NULL,
2671                                        MatConvert_Basic,
2672                                        NULL,
2673                                        NULL,
2674                                /* 74*/ NULL,
2675                                        MatFDColoringApply_BAIJ,
2676                                        NULL,
2677                                        NULL,
2678                                        NULL,
2679                                /* 79*/ NULL,
2680                                        NULL,
2681                                        NULL,
2682                                        NULL,
2683                                        MatLoad_SeqBAIJ,
2684                                /* 84*/ NULL,
2685                                        NULL,
2686                                        NULL,
2687                                        NULL,
2688                                        NULL,
2689                                /* 89*/ NULL,
2690                                        NULL,
2691                                        NULL,
2692                                        NULL,
2693                                        NULL,
2694                                /* 94*/ NULL,
2695                                        NULL,
2696                                        NULL,
2697                                        NULL,
2698                                        NULL,
2699                                /* 99*/ NULL,
2700                                        NULL,
2701                                        NULL,
2702                                        MatConjugate_SeqBAIJ,
2703                                        NULL,
2704                                /*104*/ NULL,
2705                                        MatRealPart_SeqBAIJ,
2706                                        MatImaginaryPart_SeqBAIJ,
2707                                        NULL,
2708                                        NULL,
2709                                /*109*/ NULL,
2710                                        NULL,
2711                                        NULL,
2712                                        NULL,
2713                                        MatMissingDiagonal_SeqBAIJ,
2714                                /*114*/ NULL,
2715                                        NULL,
2716                                        NULL,
2717                                        NULL,
2718                                        NULL,
2719                                /*119*/ NULL,
2720                                        NULL,
2721                                        MatMultHermitianTranspose_SeqBAIJ,
2722                                        MatMultHermitianTransposeAdd_SeqBAIJ,
2723                                        NULL,
2724                                /*124*/ NULL,
2725                                        NULL,
2726                                        MatInvertBlockDiagonal_SeqBAIJ,
2727                                        NULL,
2728                                        NULL,
2729                                /*129*/ NULL,
2730                                        NULL,
2731                                        NULL,
2732                                        NULL,
2733                                        NULL,
2734                                /*134*/ NULL,
2735                                        NULL,
2736                                        NULL,
2737                                        NULL,
2738                                        NULL,
2739                                /*139*/ MatSetBlockSizes_Default,
2740                                        NULL,
2741                                        NULL,
2742                                        MatFDColoringSetUp_SeqXAIJ,
2743                                        NULL,
2744                                 /*144*/MatCreateMPIMatConcatenateSeqMat_SeqBAIJ,
2745                                        MatDestroySubMatrices_SeqBAIJ
2746 };
2747 
2748 PetscErrorCode  MatStoreValues_SeqBAIJ(Mat mat)
2749 {
2750   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2751   PetscInt       nz   = aij->i[aij->mbs]*aij->bs2;
2752   PetscErrorCode ierr;
2753 
2754   PetscFunctionBegin;
2755   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2756 
2757   /* allocate space for values if not already there */
2758   if (!aij->saved_values) {
2759     ierr = PetscMalloc1(nz+1,&aij->saved_values);CHKERRQ(ierr);
2760     ierr = PetscLogObjectMemory((PetscObject)mat,(nz+1)*sizeof(PetscScalar));CHKERRQ(ierr);
2761   }
2762 
2763   /* copy values over */
2764   ierr = PetscArraycpy(aij->saved_values,aij->a,nz);CHKERRQ(ierr);
2765   PetscFunctionReturn(0);
2766 }
2767 
2768 PetscErrorCode  MatRetrieveValues_SeqBAIJ(Mat mat)
2769 {
2770   Mat_SeqBAIJ    *aij = (Mat_SeqBAIJ*)mat->data;
2771   PetscErrorCode ierr;
2772   PetscInt       nz = aij->i[aij->mbs]*aij->bs2;
2773 
2774   PetscFunctionBegin;
2775   if (aij->nonew != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatSetOption(A,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);first");
2776   if (!aij->saved_values) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Must call MatStoreValues(A);first");
2777 
2778   /* copy values over */
2779   ierr = PetscArraycpy(aij->a,aij->saved_values,nz);CHKERRQ(ierr);
2780   PetscFunctionReturn(0);
2781 }
2782 
2783 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType,MatReuse,Mat*);
2784 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqSBAIJ(Mat, MatType,MatReuse,Mat*);
2785 
2786 PetscErrorCode  MatSeqBAIJSetPreallocation_SeqBAIJ(Mat B,PetscInt bs,PetscInt nz,PetscInt *nnz)
2787 {
2788   Mat_SeqBAIJ    *b;
2789   PetscErrorCode ierr;
2790   PetscInt       i,mbs,nbs,bs2;
2791   PetscBool      flg = PETSC_FALSE,skipallocation = PETSC_FALSE,realalloc = PETSC_FALSE;
2792 
2793   PetscFunctionBegin;
2794   if (nz >= 0 || nnz) realalloc = PETSC_TRUE;
2795   if (nz == MAT_SKIP_ALLOCATION) {
2796     skipallocation = PETSC_TRUE;
2797     nz             = 0;
2798   }
2799 
2800   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
2801   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
2802   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
2803   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
2804 
2805   B->preallocated = PETSC_TRUE;
2806 
2807   mbs = B->rmap->n/bs;
2808   nbs = B->cmap->n/bs;
2809   bs2 = bs*bs;
2810 
2811   if (mbs*bs!=B->rmap->n || nbs*bs!=B->cmap->n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Number rows %D, cols %D must be divisible by blocksize %D",B->rmap->N,B->cmap->n,bs);
2812 
2813   if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2814   if (nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nz cannot be less than 0: value %D",nz);
2815   if (nnz) {
2816     for (i=0; i<mbs; i++) {
2817       if (nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be less than 0: local row %D value %D",i,nnz[i]);
2818       if (nnz[i] > nbs) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"nnz cannot be greater than block row length: local row %D value %D rowlength %D",i,nnz[i],nbs);
2819     }
2820   }
2821 
2822   b    = (Mat_SeqBAIJ*)B->data;
2823   ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Optimize options for SEQBAIJ matrix 2 ","Mat");CHKERRQ(ierr);
2824   ierr = PetscOptionsBool("-mat_no_unroll","Do not optimize for block size (slow)",NULL,flg,&flg,NULL);CHKERRQ(ierr);
2825   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2826 
2827   if (!flg) {
2828     switch (bs) {
2829     case 1:
2830       B->ops->mult    = MatMult_SeqBAIJ_1;
2831       B->ops->multadd = MatMultAdd_SeqBAIJ_1;
2832       break;
2833     case 2:
2834       B->ops->mult    = MatMult_SeqBAIJ_2;
2835       B->ops->multadd = MatMultAdd_SeqBAIJ_2;
2836       break;
2837     case 3:
2838       B->ops->mult    = MatMult_SeqBAIJ_3;
2839       B->ops->multadd = MatMultAdd_SeqBAIJ_3;
2840       break;
2841     case 4:
2842       B->ops->mult    = MatMult_SeqBAIJ_4;
2843       B->ops->multadd = MatMultAdd_SeqBAIJ_4;
2844       break;
2845     case 5:
2846       B->ops->mult    = MatMult_SeqBAIJ_5;
2847       B->ops->multadd = MatMultAdd_SeqBAIJ_5;
2848       break;
2849     case 6:
2850       B->ops->mult    = MatMult_SeqBAIJ_6;
2851       B->ops->multadd = MatMultAdd_SeqBAIJ_6;
2852       break;
2853     case 7:
2854       B->ops->mult    = MatMult_SeqBAIJ_7;
2855       B->ops->multadd = MatMultAdd_SeqBAIJ_7;
2856       break;
2857     case 9:
2858     {
2859       PetscInt version = 1;
2860       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2861       switch (version) {
2862 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2863       case 1:
2864         B->ops->mult    = MatMult_SeqBAIJ_9_AVX2;
2865         B->ops->multadd = MatMultAdd_SeqBAIJ_9_AVX2;
2866         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2867         break;
2868 #endif
2869       default:
2870         B->ops->mult    = MatMult_SeqBAIJ_N;
2871         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2872         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2873         break;
2874       }
2875       break;
2876     }
2877     case 11:
2878       B->ops->mult    = MatMult_SeqBAIJ_11;
2879       B->ops->multadd = MatMultAdd_SeqBAIJ_11;
2880       break;
2881     case 12:
2882     {
2883       PetscInt version = 1;
2884       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2885       switch (version) {
2886       case 1:
2887         B->ops->mult    = MatMult_SeqBAIJ_12_ver1;
2888         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2889         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2890         break;
2891       case 2:
2892         B->ops->mult    = MatMult_SeqBAIJ_12_ver2;
2893         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver2;
2894         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2895         break;
2896 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
2897       case 3:
2898         B->ops->mult    = MatMult_SeqBAIJ_12_AVX2;
2899         B->ops->multadd = MatMultAdd_SeqBAIJ_12_ver1;
2900         ierr = PetscInfo1((PetscObject)B,"Using AVX2 for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2901         break;
2902 #endif
2903       default:
2904         B->ops->mult    = MatMult_SeqBAIJ_N;
2905         B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2906         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2907         break;
2908       }
2909       break;
2910     }
2911     case 15:
2912     {
2913       PetscInt version = 1;
2914       ierr = PetscOptionsGetInt(NULL,((PetscObject)B)->prefix,"-mat_baij_mult_version",&version,NULL);CHKERRQ(ierr);
2915       switch (version) {
2916       case 1:
2917         B->ops->mult    = MatMult_SeqBAIJ_15_ver1;
2918         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2919         break;
2920       case 2:
2921         B->ops->mult    = MatMult_SeqBAIJ_15_ver2;
2922         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2923         break;
2924       case 3:
2925         B->ops->mult    = MatMult_SeqBAIJ_15_ver3;
2926         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2927         break;
2928       case 4:
2929         B->ops->mult    = MatMult_SeqBAIJ_15_ver4;
2930         ierr = PetscInfo2((PetscObject)B,"Using version %D of MatMult for BAIJ for blocksize %D\n",version,bs);CHKERRQ(ierr);
2931         break;
2932       default:
2933         B->ops->mult    = MatMult_SeqBAIJ_N;
2934         ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2935         break;
2936       }
2937       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2938       break;
2939     }
2940     default:
2941       B->ops->mult    = MatMult_SeqBAIJ_N;
2942       B->ops->multadd = MatMultAdd_SeqBAIJ_N;
2943       ierr = PetscInfo1((PetscObject)B,"Using BLAS for MatMult for BAIJ for blocksize %D\n",bs);CHKERRQ(ierr);
2944       break;
2945     }
2946   }
2947   B->ops->sor = MatSOR_SeqBAIJ;
2948   b->mbs = mbs;
2949   b->nbs = nbs;
2950   if (!skipallocation) {
2951     if (!b->imax) {
2952       ierr = PetscMalloc2(mbs,&b->imax,mbs,&b->ilen);CHKERRQ(ierr);
2953       ierr = PetscLogObjectMemory((PetscObject)B,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
2954 
2955       b->free_imax_ilen = PETSC_TRUE;
2956     }
2957     /* b->ilen will count nonzeros in each block row so far. */
2958     for (i=0; i<mbs; i++) b->ilen[i] = 0;
2959     if (!nnz) {
2960       if (nz == PETSC_DEFAULT || nz == PETSC_DECIDE) nz = 5;
2961       else if (nz < 0) nz = 1;
2962       nz = PetscMin(nz,nbs);
2963       for (i=0; i<mbs; i++) b->imax[i] = nz;
2964       ierr = PetscIntMultError(nz,mbs,&nz);CHKERRQ(ierr);
2965     } else {
2966       PetscInt64 nz64 = 0;
2967       for (i=0; i<mbs; i++) {b->imax[i] = nnz[i]; nz64 += nnz[i];}
2968       ierr = PetscIntCast(nz64,&nz);CHKERRQ(ierr);
2969     }
2970 
2971     /* allocate the matrix space */
2972     ierr = MatSeqXAIJFreeAIJ(B,&b->a,&b->j,&b->i);CHKERRQ(ierr);
2973     if (B->structure_only) {
2974       ierr = PetscMalloc1(nz,&b->j);CHKERRQ(ierr);
2975       ierr = PetscMalloc1(B->rmap->N+1,&b->i);CHKERRQ(ierr);
2976       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*sizeof(PetscInt));CHKERRQ(ierr);
2977     } else {
2978       PetscInt nzbs2 = 0;
2979       ierr = PetscIntMultError(nz,bs2,&nzbs2);CHKERRQ(ierr);
2980       ierr = PetscMalloc3(nzbs2,&b->a,nz,&b->j,B->rmap->N+1,&b->i);CHKERRQ(ierr);
2981       ierr = PetscLogObjectMemory((PetscObject)B,(B->rmap->N+1)*sizeof(PetscInt)+nz*(bs2*sizeof(PetscScalar)+sizeof(PetscInt)));CHKERRQ(ierr);
2982       ierr = PetscArrayzero(b->a,nz*bs2);CHKERRQ(ierr);
2983     }
2984     ierr = PetscArrayzero(b->j,nz);CHKERRQ(ierr);
2985 
2986     if (B->structure_only) {
2987       b->singlemalloc = PETSC_FALSE;
2988       b->free_a       = PETSC_FALSE;
2989     } else {
2990       b->singlemalloc = PETSC_TRUE;
2991       b->free_a       = PETSC_TRUE;
2992     }
2993     b->free_ij = PETSC_TRUE;
2994 
2995     b->i[0] = 0;
2996     for (i=1; i<mbs+1; i++) {
2997       b->i[i] = b->i[i-1] + b->imax[i-1];
2998     }
2999 
3000   } else {
3001     b->free_a  = PETSC_FALSE;
3002     b->free_ij = PETSC_FALSE;
3003   }
3004 
3005   b->bs2              = bs2;
3006   b->mbs              = mbs;
3007   b->nz               = 0;
3008   b->maxnz            = nz;
3009   B->info.nz_unneeded = (PetscReal)b->maxnz*bs2;
3010   B->was_assembled    = PETSC_FALSE;
3011   B->assembled        = PETSC_FALSE;
3012   if (realalloc) {ierr = MatSetOption(B,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);}
3013   PetscFunctionReturn(0);
3014 }
3015 
3016 PetscErrorCode MatSeqBAIJSetPreallocationCSR_SeqBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
3017 {
3018   PetscInt       i,m,nz,nz_max=0,*nnz;
3019   PetscScalar    *values=NULL;
3020   PetscBool      roworiented = ((Mat_SeqBAIJ*)B->data)->roworiented;
3021   PetscErrorCode ierr;
3022 
3023   PetscFunctionBegin;
3024   if (bs < 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
3025   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
3026   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
3027   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
3028   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
3029   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
3030   m    = B->rmap->n/bs;
3031 
3032   if (ii[0] != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %D",ii[0]);
3033   ierr = PetscMalloc1(m+1, &nnz);CHKERRQ(ierr);
3034   for (i=0; i<m; i++) {
3035     nz = ii[i+1]- ii[i];
3036     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE, "Local row %D has a negative number of columns %D",i,nz);
3037     nz_max = PetscMax(nz_max, nz);
3038     nnz[i] = nz;
3039   }
3040   ierr = MatSeqBAIJSetPreallocation(B,bs,0,nnz);CHKERRQ(ierr);
3041   ierr = PetscFree(nnz);CHKERRQ(ierr);
3042 
3043   values = (PetscScalar*)V;
3044   if (!values) {
3045     ierr = PetscCalloc1(bs*bs*(nz_max+1),&values);CHKERRQ(ierr);
3046   }
3047   for (i=0; i<m; i++) {
3048     PetscInt          ncols  = ii[i+1] - ii[i];
3049     const PetscInt    *icols = jj + ii[i];
3050     if (bs == 1 || !roworiented) {
3051       const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
3052       ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
3053     } else {
3054       PetscInt j;
3055       for (j=0; j<ncols; j++) {
3056         const PetscScalar *svals = values + (V ? (bs*bs*(ii[i]+j)) : 0);
3057         ierr = MatSetValuesBlocked_SeqBAIJ(B,1,&i,1,&icols[j],svals,INSERT_VALUES);CHKERRQ(ierr);
3058       }
3059     }
3060   }
3061   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
3062   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3063   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3064   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3065   PetscFunctionReturn(0);
3066 }
3067 
3068 /*@C
3069    MatSeqBAIJGetArray - gives access to the array where the data for a MATSEQBAIJ matrix is stored
3070 
3071    Not Collective
3072 
3073    Input Parameter:
3074 .  mat - a MATSEQBAIJ matrix
3075 
3076    Output Parameter:
3077 .   array - pointer to the data
3078 
3079    Level: intermediate
3080 
3081 .seealso: MatSeqBAIJRestoreArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3082 @*/
3083 PetscErrorCode MatSeqBAIJGetArray(Mat A,PetscScalar **array)
3084 {
3085   PetscErrorCode ierr;
3086 
3087   PetscFunctionBegin;
3088   ierr = PetscUseMethod(A,"MatSeqBAIJGetArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3089   PetscFunctionReturn(0);
3090 }
3091 
3092 /*@C
3093    MatSeqBAIJRestoreArray - returns access to the array where the data for a MATSEQBAIJ matrix is stored obtained by MatSeqBAIJGetArray()
3094 
3095    Not Collective
3096 
3097    Input Parameters:
3098 +  mat - a MATSEQBAIJ matrix
3099 -  array - pointer to the data
3100 
3101    Level: intermediate
3102 
3103 .seealso: MatSeqBAIJGetArray(), MatSeqAIJGetArray(), MatSeqAIJRestoreArray()
3104 @*/
3105 PetscErrorCode MatSeqBAIJRestoreArray(Mat A,PetscScalar **array)
3106 {
3107   PetscErrorCode ierr;
3108 
3109   PetscFunctionBegin;
3110   ierr = PetscUseMethod(A,"MatSeqBAIJRestoreArray_C",(Mat,PetscScalar**),(A,array));CHKERRQ(ierr);
3111   PetscFunctionReturn(0);
3112 }
3113 
3114 /*MC
3115    MATSEQBAIJ - MATSEQBAIJ = "seqbaij" - A matrix type to be used for sequential block sparse matrices, based on
3116    block sparse compressed row format.
3117 
3118    Options Database Keys:
3119 + -mat_type seqbaij - sets the matrix type to "seqbaij" during a call to MatSetFromOptions()
3120 - -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS)
3121 
3122    Level: beginner
3123 
3124    Notes:
3125     MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no
3126     space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored
3127 
3128    Run with -info to see what version of the matrix-vector product is being used
3129 
3130 .seealso: MatCreateSeqBAIJ()
3131 M*/
3132 
3133 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqBSTRM(Mat, MatType,MatReuse,Mat*);
3134 
3135 PETSC_EXTERN PetscErrorCode MatCreate_SeqBAIJ(Mat B)
3136 {
3137   PetscErrorCode ierr;
3138   PetscMPIInt    size;
3139   Mat_SeqBAIJ    *b;
3140 
3141   PetscFunctionBegin;
3142   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&size);CHKERRMPI(ierr);
3143   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Comm must be of size 1");
3144 
3145   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
3146   B->data = (void*)b;
3147   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
3148 
3149   b->row          = NULL;
3150   b->col          = NULL;
3151   b->icol         = NULL;
3152   b->reallocs     = 0;
3153   b->saved_values = NULL;
3154 
3155   b->roworiented        = PETSC_TRUE;
3156   b->nonew              = 0;
3157   b->diag               = NULL;
3158   B->spptr              = NULL;
3159   B->info.nz_unneeded   = (PetscReal)b->maxnz*b->bs2;
3160   b->keepnonzeropattern = PETSC_FALSE;
3161 
3162   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJGetArray_C",MatSeqBAIJGetArray_SeqBAIJ);CHKERRQ(ierr);
3163   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJRestoreArray_C",MatSeqBAIJRestoreArray_SeqBAIJ);CHKERRQ(ierr);
3164   ierr = PetscObjectComposeFunction((PetscObject)B,"MatInvertBlockDiagonal_C",MatInvertBlockDiagonal_SeqBAIJ);CHKERRQ(ierr);
3165   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_SeqBAIJ);CHKERRQ(ierr);
3166   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_SeqBAIJ);CHKERRQ(ierr);
3167   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetColumnIndices_C",MatSeqBAIJSetColumnIndices_SeqBAIJ);CHKERRQ(ierr);
3168   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqaij_C",MatConvert_SeqBAIJ_SeqAIJ);CHKERRQ(ierr);
3169   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_seqsbaij_C",MatConvert_SeqBAIJ_SeqSBAIJ);CHKERRQ(ierr);
3170   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocation_C",MatSeqBAIJSetPreallocation_SeqBAIJ);CHKERRQ(ierr);
3171   ierr = PetscObjectComposeFunction((PetscObject)B,"MatSeqBAIJSetPreallocationCSR_C",MatSeqBAIJSetPreallocationCSR_SeqBAIJ);CHKERRQ(ierr);
3172   ierr = PetscObjectComposeFunction((PetscObject)B,"MatIsTranspose_C",MatIsTranspose_SeqBAIJ);CHKERRQ(ierr);
3173 #if defined(PETSC_HAVE_HYPRE)
3174   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_hypre_C",MatConvert_AIJ_HYPRE);CHKERRQ(ierr);
3175 #endif
3176   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_seqbaij_is_C",MatConvert_XAIJ_IS);CHKERRQ(ierr);
3177   ierr = PetscObjectChangeTypeName((PetscObject)B,MATSEQBAIJ);CHKERRQ(ierr);
3178   PetscFunctionReturn(0);
3179 }
3180 
3181 PetscErrorCode MatDuplicateNoCreate_SeqBAIJ(Mat C,Mat A,MatDuplicateOption cpvalues,PetscBool mallocmatspace)
3182 {
3183   Mat_SeqBAIJ    *c = (Mat_SeqBAIJ*)C->data,*a = (Mat_SeqBAIJ*)A->data;
3184   PetscErrorCode ierr;
3185   PetscInt       i,mbs = a->mbs,nz = a->nz,bs2 = a->bs2;
3186 
3187   PetscFunctionBegin;
3188   if (a->i[mbs] != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Corrupt matrix");
3189 
3190   if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3191     c->imax           = a->imax;
3192     c->ilen           = a->ilen;
3193     c->free_imax_ilen = PETSC_FALSE;
3194   } else {
3195     ierr = PetscMalloc2(mbs,&c->imax,mbs,&c->ilen);CHKERRQ(ierr);
3196     ierr = PetscLogObjectMemory((PetscObject)C,2*mbs*sizeof(PetscInt));CHKERRQ(ierr);
3197     for (i=0; i<mbs; i++) {
3198       c->imax[i] = a->imax[i];
3199       c->ilen[i] = a->ilen[i];
3200     }
3201     c->free_imax_ilen = PETSC_TRUE;
3202   }
3203 
3204   /* allocate the matrix space */
3205   if (mallocmatspace) {
3206     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3207       ierr = PetscCalloc1(bs2*nz,&c->a);CHKERRQ(ierr);
3208       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*bs2*sizeof(PetscScalar));CHKERRQ(ierr);
3209 
3210       c->i            = a->i;
3211       c->j            = a->j;
3212       c->singlemalloc = PETSC_FALSE;
3213       c->free_a       = PETSC_TRUE;
3214       c->free_ij      = PETSC_FALSE;
3215       c->parent       = A;
3216       C->preallocated = PETSC_TRUE;
3217       C->assembled    = PETSC_TRUE;
3218 
3219       ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr);
3220       ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3221       ierr = MatSetOption(C,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
3222     } else {
3223       ierr = PetscMalloc3(bs2*nz,&c->a,nz,&c->j,mbs+1,&c->i);CHKERRQ(ierr);
3224       ierr = PetscLogObjectMemory((PetscObject)C,a->i[mbs]*(bs2*sizeof(PetscScalar)+sizeof(PetscInt))+(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3225 
3226       c->singlemalloc = PETSC_TRUE;
3227       c->free_a       = PETSC_TRUE;
3228       c->free_ij      = PETSC_TRUE;
3229 
3230       ierr = PetscArraycpy(c->i,a->i,mbs+1);CHKERRQ(ierr);
3231       if (mbs > 0) {
3232         ierr = PetscArraycpy(c->j,a->j,nz);CHKERRQ(ierr);
3233         if (cpvalues == MAT_COPY_VALUES) {
3234           ierr = PetscArraycpy(c->a,a->a,bs2*nz);CHKERRQ(ierr);
3235         } else {
3236           ierr = PetscArrayzero(c->a,bs2*nz);CHKERRQ(ierr);
3237         }
3238       }
3239       C->preallocated = PETSC_TRUE;
3240       C->assembled    = PETSC_TRUE;
3241     }
3242   }
3243 
3244   c->roworiented = a->roworiented;
3245   c->nonew       = a->nonew;
3246 
3247   ierr = PetscLayoutReference(A->rmap,&C->rmap);CHKERRQ(ierr);
3248   ierr = PetscLayoutReference(A->cmap,&C->cmap);CHKERRQ(ierr);
3249 
3250   c->bs2         = a->bs2;
3251   c->mbs         = a->mbs;
3252   c->nbs         = a->nbs;
3253 
3254   if (a->diag) {
3255     if (cpvalues == MAT_SHARE_NONZERO_PATTERN) {
3256       c->diag      = a->diag;
3257       c->free_diag = PETSC_FALSE;
3258     } else {
3259       ierr = PetscMalloc1(mbs+1,&c->diag);CHKERRQ(ierr);
3260       ierr = PetscLogObjectMemory((PetscObject)C,(mbs+1)*sizeof(PetscInt));CHKERRQ(ierr);
3261       for (i=0; i<mbs; i++) c->diag[i] = a->diag[i];
3262       c->free_diag = PETSC_TRUE;
3263     }
3264   } else c->diag = NULL;
3265 
3266   c->nz         = a->nz;
3267   c->maxnz      = a->nz;         /* Since we allocate exactly the right amount */
3268   c->solve_work = NULL;
3269   c->mult_work  = NULL;
3270   c->sor_workt  = NULL;
3271   c->sor_work   = NULL;
3272 
3273   c->compressedrow.use   = a->compressedrow.use;
3274   c->compressedrow.nrows = a->compressedrow.nrows;
3275   if (a->compressedrow.use) {
3276     i    = a->compressedrow.nrows;
3277     ierr = PetscMalloc2(i+1,&c->compressedrow.i,i+1,&c->compressedrow.rindex);CHKERRQ(ierr);
3278     ierr = PetscLogObjectMemory((PetscObject)C,(2*i+1)*sizeof(PetscInt));CHKERRQ(ierr);
3279     ierr = PetscArraycpy(c->compressedrow.i,a->compressedrow.i,i+1);CHKERRQ(ierr);
3280     ierr = PetscArraycpy(c->compressedrow.rindex,a->compressedrow.rindex,i);CHKERRQ(ierr);
3281   } else {
3282     c->compressedrow.use    = PETSC_FALSE;
3283     c->compressedrow.i      = NULL;
3284     c->compressedrow.rindex = NULL;
3285   }
3286   C->nonzerostate = A->nonzerostate;
3287 
3288   ierr = PetscFunctionListDuplicate(((PetscObject)A)->qlist,&((PetscObject)C)->qlist);CHKERRQ(ierr);
3289   PetscFunctionReturn(0);
3290 }
3291 
3292 PetscErrorCode MatDuplicate_SeqBAIJ(Mat A,MatDuplicateOption cpvalues,Mat *B)
3293 {
3294   PetscErrorCode ierr;
3295 
3296   PetscFunctionBegin;
3297   ierr = MatCreate(PetscObjectComm((PetscObject)A),B);CHKERRQ(ierr);
3298   ierr = MatSetSizes(*B,A->rmap->N,A->cmap->n,A->rmap->N,A->cmap->n);CHKERRQ(ierr);
3299   ierr = MatSetType(*B,MATSEQBAIJ);CHKERRQ(ierr);
3300   ierr = MatDuplicateNoCreate_SeqBAIJ(*B,A,cpvalues,PETSC_TRUE);CHKERRQ(ierr);
3301   PetscFunctionReturn(0);
3302 }
3303 
3304 /* Used for both SeqBAIJ and SeqSBAIJ matrices */
3305 PetscErrorCode MatLoad_SeqBAIJ_Binary(Mat mat,PetscViewer viewer)
3306 {
3307   PetscInt       header[4],M,N,nz,bs,m,n,mbs,nbs,rows,cols,sum,i,j,k;
3308   PetscInt       *rowidxs,*colidxs;
3309   PetscScalar    *matvals;
3310   PetscErrorCode ierr;
3311 
3312   PetscFunctionBegin;
3313   ierr = PetscViewerSetUp(viewer);CHKERRQ(ierr);
3314 
3315   /* read matrix header */
3316   ierr = PetscViewerBinaryRead(viewer,header,4,NULL,PETSC_INT);CHKERRQ(ierr);
3317   if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Not a matrix object in file");
3318   M = header[1]; N = header[2]; nz = header[3];
3319   if (M < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix row size (%D) in file is negative",M);
3320   if (N < 0) SETERRQ1(PetscObjectComm((PetscObject)viewer),PETSC_ERR_FILE_UNEXPECTED,"Matrix column size (%D) in file is negative",N);
3321   if (nz < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format on disk, cannot load as SeqBAIJ");
3322 
3323   /* set block sizes from the viewer's .info file */
3324   ierr = MatLoad_Binary_BlockSizes(mat,viewer);CHKERRQ(ierr);
3325   /* set local and global sizes if not set already */
3326   if (mat->rmap->n < 0) mat->rmap->n = M;
3327   if (mat->cmap->n < 0) mat->cmap->n = N;
3328   if (mat->rmap->N < 0) mat->rmap->N = M;
3329   if (mat->cmap->N < 0) mat->cmap->N = N;
3330   ierr = PetscLayoutSetUp(mat->rmap);CHKERRQ(ierr);
3331   ierr = PetscLayoutSetUp(mat->cmap);CHKERRQ(ierr);
3332 
3333   /* check if the matrix sizes are correct */
3334   ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr);
3335   if (M != rows || N != cols) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%D, %D) than the input matrix (%D, %D)",M,N,rows,cols);
3336   ierr = MatGetBlockSize(mat,&bs);CHKERRQ(ierr);
3337   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
3338   mbs = m/bs; nbs = n/bs;
3339 
3340   /* read in row lengths, column indices and nonzero values */
3341   ierr = PetscMalloc1(m+1,&rowidxs);CHKERRQ(ierr);
3342   ierr = PetscViewerBinaryRead(viewer,rowidxs+1,m,NULL,PETSC_INT);CHKERRQ(ierr);
3343   rowidxs[0] = 0; for (i=0; i<m; i++) rowidxs[i+1] += rowidxs[i];
3344   sum = rowidxs[m];
3345   if (sum != nz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Inconsistent matrix data in file: nonzeros = %D, sum-row-lengths = %D\n",nz,sum);
3346 
3347   /* read in column indices and nonzero values */
3348   ierr = PetscMalloc2(rowidxs[m],&colidxs,nz,&matvals);CHKERRQ(ierr);
3349   ierr = PetscViewerBinaryRead(viewer,colidxs,rowidxs[m],NULL,PETSC_INT);CHKERRQ(ierr);
3350   ierr = PetscViewerBinaryRead(viewer,matvals,rowidxs[m],NULL,PETSC_SCALAR);CHKERRQ(ierr);
3351 
3352   { /* preallocate matrix storage */
3353     PetscBT   bt; /* helper bit set to count nonzeros */
3354     PetscInt  *nnz;
3355     PetscBool sbaij;
3356 
3357     ierr = PetscBTCreate(nbs,&bt);CHKERRQ(ierr);
3358     ierr = PetscCalloc1(mbs,&nnz);CHKERRQ(ierr);
3359     ierr = PetscObjectTypeCompare((PetscObject)mat,MATSEQSBAIJ,&sbaij);CHKERRQ(ierr);
3360     for (i=0; i<mbs; i++) {
3361       ierr = PetscBTMemzero(nbs,bt);CHKERRQ(ierr);
3362       for (k=0; k<bs; k++) {
3363         PetscInt row = bs*i + k;
3364         for (j=rowidxs[row]; j<rowidxs[row+1]; j++) {
3365           PetscInt col = colidxs[j];
3366           if (!sbaij || col >= row)
3367             if (!PetscBTLookupSet(bt,col/bs)) nnz[i]++;
3368         }
3369       }
3370     }
3371     ierr = PetscBTDestroy(&bt);CHKERRQ(ierr);
3372     ierr = MatSeqBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3373     ierr = MatSeqSBAIJSetPreallocation(mat,bs,0,nnz);CHKERRQ(ierr);
3374     ierr = PetscFree(nnz);CHKERRQ(ierr);
3375   }
3376 
3377   /* store matrix values */
3378   for (i=0; i<m; i++) {
3379     PetscInt row = i, s = rowidxs[i], e = rowidxs[i+1];
3380     ierr = (*mat->ops->setvalues)(mat,1,&row,e-s,colidxs+s,matvals+s,INSERT_VALUES);CHKERRQ(ierr);
3381   }
3382 
3383   ierr = PetscFree(rowidxs);CHKERRQ(ierr);
3384   ierr = PetscFree2(colidxs,matvals);CHKERRQ(ierr);
3385   ierr = MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3386   ierr = MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3387   PetscFunctionReturn(0);
3388 }
3389 
3390 PetscErrorCode MatLoad_SeqBAIJ(Mat mat,PetscViewer viewer)
3391 {
3392   PetscErrorCode ierr;
3393   PetscBool      isbinary;
3394 
3395   PetscFunctionBegin;
3396   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
3397   if (!isbinary) SETERRQ2(PetscObjectComm((PetscObject)viewer),PETSC_ERR_SUP,"Viewer type %s not yet supported for reading %s matrices",((PetscObject)viewer)->type_name,((PetscObject)mat)->type_name);
3398   ierr = MatLoad_SeqBAIJ_Binary(mat,viewer);CHKERRQ(ierr);
3399   PetscFunctionReturn(0);
3400 }
3401 
3402 /*@C
3403    MatCreateSeqBAIJ - Creates a sparse matrix in block AIJ (block
3404    compressed row) format.  For good matrix assembly performance the
3405    user should preallocate the matrix storage by setting the parameter nz
3406    (or the array nnz).  By setting these parameters accurately, performance
3407    during matrix assembly can be increased by more than a factor of 50.
3408 
3409    Collective
3410 
3411    Input Parameters:
3412 +  comm - MPI communicator, set to PETSC_COMM_SELF
3413 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3414           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3415 .  m - number of rows
3416 .  n - number of columns
3417 .  nz - number of nonzero blocks  per block row (same for all rows)
3418 -  nnz - array containing the number of nonzero blocks in the various block rows
3419          (possibly different for each block row) or NULL
3420 
3421    Output Parameter:
3422 .  A - the matrix
3423 
3424    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
3425    MatXXXXSetPreallocation() paradigm instead of this routine directly.
3426    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
3427 
3428    Options Database Keys:
3429 +   -mat_no_unroll - uses code that does not unroll the loops in the
3430                      block calculations (much slower)
3431 -    -mat_block_size - size of the blocks to use
3432 
3433    Level: intermediate
3434 
3435    Notes:
3436    The number of rows and columns must be divisible by blocksize.
3437 
3438    If the nnz parameter is given then the nz parameter is ignored
3439 
3440    A nonzero block is any block that as 1 or more nonzeros in it
3441 
3442    The block AIJ format is fully compatible with standard Fortran 77
3443    storage.  That is, the stored row and column indices can begin at
3444    either one (as in Fortran) or zero.  See the users' manual for details.
3445 
3446    Specify the preallocated storage with either nz or nnz (not both).
3447    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3448    allocation.  See Users-Manual: ch_mat for details.
3449    matrices.
3450 
3451 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ()
3452 @*/
3453 PetscErrorCode  MatCreateSeqBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt nz,const PetscInt nnz[],Mat *A)
3454 {
3455   PetscErrorCode ierr;
3456 
3457   PetscFunctionBegin;
3458   ierr = MatCreate(comm,A);CHKERRQ(ierr);
3459   ierr = MatSetSizes(*A,m,n,m,n);CHKERRQ(ierr);
3460   ierr = MatSetType(*A,MATSEQBAIJ);CHKERRQ(ierr);
3461   ierr = MatSeqBAIJSetPreallocation(*A,bs,nz,(PetscInt*)nnz);CHKERRQ(ierr);
3462   PetscFunctionReturn(0);
3463 }
3464 
3465 /*@C
3466    MatSeqBAIJSetPreallocation - Sets the block size and expected nonzeros
3467    per row in the matrix. For good matrix assembly performance the
3468    user should preallocate the matrix storage by setting the parameter nz
3469    (or the array nnz).  By setting these parameters accurately, performance
3470    during matrix assembly can be increased by more than a factor of 50.
3471 
3472    Collective
3473 
3474    Input Parameters:
3475 +  B - the matrix
3476 .  bs - size of block, the blocks are ALWAYS square. One can use MatSetBlockSizes() to set a different row and column blocksize but the row
3477           blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs()
3478 .  nz - number of block nonzeros per block row (same for all rows)
3479 -  nnz - array containing the number of block nonzeros in the various block rows
3480          (possibly different for each block row) or NULL
3481 
3482    Options Database Keys:
3483 +   -mat_no_unroll - uses code that does not unroll the loops in the
3484                      block calculations (much slower)
3485 -   -mat_block_size - size of the blocks to use
3486 
3487    Level: intermediate
3488 
3489    Notes:
3490    If the nnz parameter is given then the nz parameter is ignored
3491 
3492    You can call MatGetInfo() to get information on how effective the preallocation was;
3493    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
3494    You can also run with the option -info and look for messages with the string
3495    malloc in them to see if additional memory allocation was needed.
3496 
3497    The block AIJ format is fully compatible with standard Fortran 77
3498    storage.  That is, the stored row and column indices can begin at
3499    either one (as in Fortran) or zero.  See the users' manual for details.
3500 
3501    Specify the preallocated storage with either nz or nnz (not both).
3502    Set nz=PETSC_DEFAULT and nnz=NULL for PETSc to control dynamic memory
3503    allocation.  See Users-Manual: ch_mat for details.
3504 
3505 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatCreateBAIJ(), MatGetInfo()
3506 @*/
3507 PetscErrorCode  MatSeqBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt nz,const PetscInt nnz[])
3508 {
3509   PetscErrorCode ierr;
3510 
3511   PetscFunctionBegin;
3512   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3513   PetscValidType(B,1);
3514   PetscValidLogicalCollectiveInt(B,bs,2);
3515   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[]),(B,bs,nz,nnz));CHKERRQ(ierr);
3516   PetscFunctionReturn(0);
3517 }
3518 
3519 /*@C
3520    MatSeqBAIJSetPreallocationCSR - Creates a sparse parallel matrix in BAIJ format using the given nonzero structure and (optional) numerical values
3521 
3522    Collective
3523 
3524    Input Parameters:
3525 +  B - the matrix
3526 .  i - the indices into j for the start of each local row (starts with zero)
3527 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
3528 -  v - optional values in the matrix
3529 
3530    Level: advanced
3531 
3532    Notes:
3533    The order of the entries in values is specified by the MatOption MAT_ROW_ORIENTED.  For example, C programs
3534    may want to use the default MAT_ROW_ORIENTED=PETSC_TRUE and use an array v[nnz][bs][bs] where the second index is
3535    over rows within a block and the last index is over columns within a block row.  Fortran programs will likely set
3536    MAT_ROW_ORIENTED=PETSC_FALSE and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a
3537    block column and the second index is over columns within a block.
3538 
3539    Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well
3540 
3541 .seealso: MatCreate(), MatCreateSeqBAIJ(), MatSetValues(), MatSeqBAIJSetPreallocation(), MATSEQBAIJ
3542 @*/
3543 PetscErrorCode  MatSeqBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
3544 {
3545   PetscErrorCode ierr;
3546 
3547   PetscFunctionBegin;
3548   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
3549   PetscValidType(B,1);
3550   PetscValidLogicalCollectiveInt(B,bs,2);
3551   ierr = PetscTryMethod(B,"MatSeqBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
3552   PetscFunctionReturn(0);
3553 }
3554 
3555 /*@
3556      MatCreateSeqBAIJWithArrays - Creates an sequential BAIJ matrix using matrix elements provided by the user.
3557 
3558      Collective
3559 
3560    Input Parameters:
3561 +  comm - must be an MPI communicator of size 1
3562 .  bs - size of block
3563 .  m - number of rows
3564 .  n - number of columns
3565 .  i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row block row of the matrix
3566 .  j - column indices
3567 -  a - matrix values
3568 
3569    Output Parameter:
3570 .  mat - the matrix
3571 
3572    Level: advanced
3573 
3574    Notes:
3575        The i, j, and a arrays are not copied by this routine, the user must free these arrays
3576     once the matrix is destroyed
3577 
3578        You cannot set new nonzero locations into this matrix, that will generate an error.
3579 
3580        The i and j indices are 0 based
3581 
3582        When block size is greater than 1 the matrix values must be stored using the BAIJ storage format (see the BAIJ code to determine this).
3583 
3584       The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is
3585       the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first
3586       block, followed by the second column of the first block etc etc.  That is, the blocks are contiguous in memory
3587       with column-major ordering within blocks.
3588 
3589 .seealso: MatCreate(), MatCreateBAIJ(), MatCreateSeqBAIJ()
3590 
3591 @*/
3592 PetscErrorCode  MatCreateSeqBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt i[],PetscInt j[],PetscScalar a[],Mat *mat)
3593 {
3594   PetscErrorCode ierr;
3595   PetscInt       ii;
3596   Mat_SeqBAIJ    *baij;
3597 
3598   PetscFunctionBegin;
3599   if (bs != 1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"block size %D > 1 is not supported yet",bs);
3600   if (m > 0 && i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
3601 
3602   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
3603   ierr = MatSetSizes(*mat,m,n,m,n);CHKERRQ(ierr);
3604   ierr = MatSetType(*mat,MATSEQBAIJ);CHKERRQ(ierr);
3605   ierr = MatSeqBAIJSetPreallocation(*mat,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
3606   baij = (Mat_SeqBAIJ*)(*mat)->data;
3607   ierr = PetscMalloc2(m,&baij->imax,m,&baij->ilen);CHKERRQ(ierr);
3608   ierr = PetscLogObjectMemory((PetscObject)*mat,2*m*sizeof(PetscInt));CHKERRQ(ierr);
3609 
3610   baij->i = i;
3611   baij->j = j;
3612   baij->a = a;
3613 
3614   baij->singlemalloc = PETSC_FALSE;
3615   baij->nonew        = -1;             /*this indicates that inserting a new value in the matrix that generates a new nonzero is an error*/
3616   baij->free_a       = PETSC_FALSE;
3617   baij->free_ij      = PETSC_FALSE;
3618 
3619   for (ii=0; ii<m; ii++) {
3620     baij->ilen[ii] = baij->imax[ii] = i[ii+1] - i[ii];
3621     if (PetscUnlikelyDebug(i[ii+1] - i[ii] < 0)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row length in i (row indices) row = %d length = %d",ii,i[ii+1] - i[ii]);
3622   }
3623   if (PetscDefined(USE_DEBUG)) {
3624     for (ii=0; ii<baij->i[m]; ii++) {
3625       if (j[ii] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column index at location = %d index = %d",ii,j[ii]);
3626       if (j[ii] > n - 1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column index to large at location = %d index = %d",ii,j[ii]);
3627     }
3628   }
3629 
3630   ierr = MatAssemblyBegin(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3631   ierr = MatAssemblyEnd(*mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
3632   PetscFunctionReturn(0);
3633 }
3634 
3635 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_SeqBAIJ(MPI_Comm comm,Mat inmat,PetscInt n,MatReuse scall,Mat *outmat)
3636 {
3637   PetscErrorCode ierr;
3638   PetscMPIInt    size;
3639 
3640   PetscFunctionBegin;
3641   ierr = MPI_Comm_size(comm,&size);CHKERRMPI(ierr);
3642   if (size == 1 && scall == MAT_REUSE_MATRIX) {
3643     ierr = MatCopy(inmat,*outmat,SAME_NONZERO_PATTERN);CHKERRQ(ierr);
3644   } else {
3645     ierr = MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(comm,inmat,n,scall,outmat);CHKERRQ(ierr);
3646   }
3647   PetscFunctionReturn(0);
3648 }
3649